Skip to content

Commit f413e5c

Browse files
committed
fix gke monitoring.
1 parent ba05c43 commit f413e5c

File tree

4 files changed

+79
-3
lines changed

4 files changed

+79
-3
lines changed

config/charts/inferencepool/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ inferenceExtension:
133133

134134
**Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster.
135135

136-
For GKE environments, monitoring is automatically configured when `provider.name` is set to `gke`.
136+
For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `ClusterPodMonitoring` and RBAC resources for metrics collection.
137137

138138
Then apply it with:
139139

@@ -174,6 +174,7 @@ The following table list the configurable parameters of the chart.
174174
| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. |
175175
| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
176176
| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. |
177+
| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`ClusterPodMonitoring` and RBAC). Defaults to `false`. |
177178
| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. |
178179
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. |
179180

config/charts/inferencepool/templates/epp-sa-token-secret.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.gke.enabled }}
1+
{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled }}
22
apiVersion: v1
33
kind: Secret
44
metadata:

config/charts/inferencepool/templates/gke.yaml

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,30 @@ spec:
3535
timeoutSec: 300 # 5-minute timeout (adjust as needed)
3636
logging:
3737
enabled: true # log all requests by default
38+
{{- if .Values.inferenceExtension.monitoring.gke.enabled }}
39+
{{- $saName := printf "%s-metrics-reader-sa" .Release.Name -}}
40+
{{- $secretName := printf "%s-metrics-reader-secret" .Release.Name -}}
41+
{{- $clusterRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
42+
{{- $clusterRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}}
43+
{{- $secretReadClusterRoleName := printf "%s-%s-metrics-reader-secret-read" .Release.Namespace .Release.Name -}}
44+
{{- $gmpCollectorRoleBindingName := printf "gmp-system:collector:%s-%s-metrics-reader-secret-read" .Release.Namespace .Release.Name -}}
45+
---
46+
apiVersion: v1
47+
kind: ServiceAccount
48+
metadata:
49+
name: {{ $saName }}
50+
namespace: {{ .Release.Namespace }}
51+
---
52+
apiVersion: v1
53+
kind: Secret
54+
metadata:
55+
name: {{ $secretName }}
56+
namespace: {{ .Release.Namespace }}
57+
labels:
58+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
59+
annotations:
60+
kubernetes.io/service-account.name: {{ $saName }}
61+
type: kubernetes.io/service-account-token
3862
---
3963
apiVersion: monitoring.googleapis.com/v1
4064
kind: ClusterPodMonitoring
@@ -52,10 +76,58 @@ spec:
5276
type: Bearer
5377
credentials:
5478
secret:
55-
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
79+
name: {{ $secretName }}
5680
key: token
5781
namespace: {{ .Release.Namespace }}
5882
selector:
5983
matchLabels:
6084
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
85+
---
86+
apiVersion: rbac.authorization.k8s.io/v1
87+
kind: ClusterRole
88+
metadata:
89+
name: {{ $clusterRoleName }}
90+
rules:
91+
- nonResourceURLs:
92+
- /metrics
93+
verbs:
94+
- get
95+
---
96+
apiVersion: rbac.authorization.k8s.io/v1
97+
kind: ClusterRoleBinding
98+
metadata:
99+
name: {{ $clusterRoleBindingName }}
100+
subjects:
101+
- kind: ServiceAccount
102+
name: {{ $saName }}
103+
namespace: {{ .Release.Namespace }}
104+
roleRef:
105+
kind: ClusterRole
106+
name: {{ $clusterRoleName }}
107+
apiGroup: rbac.authorization.k8s.io
108+
---
109+
apiVersion: rbac.authorization.k8s.io/v1
110+
kind: ClusterRole
111+
metadata:
112+
name: {{ $secretReadClusterRoleName }}
113+
rules:
114+
- resources:
115+
- secrets
116+
apiGroups: [""]
117+
verbs: ["get", "list", "watch"]
118+
resourceNames: [{{ $secretName | quote }}]
119+
---
120+
apiVersion: rbac.authorization.k8s.io/v1
121+
kind: ClusterRoleBinding
122+
metadata:
123+
name: {{ $gmpCollectorRoleBindingName }}
124+
roleRef:
125+
name: {{ $secretReadClusterRoleName }}
126+
kind: ClusterRole
127+
apiGroup: rbac.authorization.k8s.io
128+
subjects:
129+
- name: collector
130+
namespace: gmp-system
131+
kind: ServiceAccount
132+
{{- end }}
61133
{{- end }}

config/charts/inferencepool/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ inferenceExtension:
5050
# Prometheus ServiceMonitor will be created when enabled for EPP metrics collection
5151
prometheus:
5252
enabled: false
53+
54+
gke:
55+
enabled: false
5356

5457
inferencePool:
5558
targetPorts:

0 commit comments

Comments
 (0)