@@ -160,14 +160,6 @@ groups:
160
160
annotations:
161
161
description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
162
162
summary: Kernel is predicted to exhaust file descriptors limit soon.
163
- - alert: KubeSchedulerDown
164
- expr: absent(up{job="kube-scheduler"} == 1)
165
- for: 15m
166
- labels:
167
- severity: critical
168
- annotations:
169
- description: KubeScheduler has disappeared from Prometheus target discovery.
170
- summary: Target disappeared from Prometheus target discovery.
171
163
- name: infra-alerts-02
172
164
rules:
173
165
- alert: KubeNodeNotReady
@@ -270,14 +262,6 @@ groups:
270
262
annotations:
271
263
description: Kubelet has disappeared from Prometheus target discovery.
272
264
summary: Target disappeared from Prometheus target discovery.
273
- - alert: KubeProxyDown
274
- expr: absent(up{job="kube-proxy"} == 1)
275
- for: 15m
276
- labels:
277
- severity: critical
278
- annotations:
279
- description: KubeProxy has disappeared from Prometheus target discovery.
280
- summary: Target disappeared from Prometheus target discovery.
281
265
- alert: KubeVersionMismatch
282
266
expr: count by(cluster) (count by(git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"}, "git_version", "$1", "git_version", "(v[0-9]*.[0-9]*).*"))) > 1
283
267
for: 15m
@@ -294,14 +278,6 @@ groups:
294
278
annotations:
295
279
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
296
280
summary: Kubernetes API server client is experiencing errors.
297
- - alert: KubeControllerManagerDown
298
- expr: absent(up{job="kube-controller-manager"} == 1)
299
- for: 15m
300
- labels:
301
- severity: critical
302
- annotations:
303
- description: KubeControllerManager has disappeared from Prometheus target discovery.
304
- summary: Target disappeared from Prometheus target discovery.
305
281
- alert: KubeClientCertificateExpiration
306
282
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by(job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
307
283
labels:
@@ -334,7 +310,7 @@ groups:
334
310
description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.
335
311
summary: Kubernetes aggregated API is down.
336
312
- alert: KubeAPIDown
337
- expr: absent(up{job="apiserver "} == 1)
313
+ expr: absent(up{job="kube-admin "} == 1)
338
314
for: 15m
339
315
labels:
340
316
severity: critical
0 commit comments