@@ -160,14 +160,6 @@ groups:
160160 annotations:
161161 description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
162162 summary: Kernel is predicted to exhaust file descriptors limit soon.
163- - alert: KubeSchedulerDown
164- expr: absent(up{job="kube-scheduler"} == 1)
165- for: 15m
166- labels:
167- severity: critical
168- annotations:
169- description: KubeScheduler has disappeared from Prometheus target discovery.
170- summary: Target disappeared from Prometheus target discovery.
171163 - name: infra-alerts-02
172164 rules:
173165 - alert: KubeNodeNotReady
@@ -270,14 +262,6 @@ groups:
270262 annotations:
271263 description: Kubelet has disappeared from Prometheus target discovery.
272264 summary: Target disappeared from Prometheus target discovery.
273- - alert: KubeProxyDown
274- expr: absent(up{job="kube-proxy"} == 1)
275- for: 15m
276- labels:
277- severity: critical
278- annotations:
279- description: KubeProxy has disappeared from Prometheus target discovery.
280- summary: Target disappeared from Prometheus target discovery.
281265 - alert: KubeVersionMismatch
282266 expr: count by(cluster) (count by(git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"}, "git_version", "$1", "git_version", "(v[0-9]*.[0-9]*).*"))) > 1
283267 for: 15m
@@ -294,14 +278,6 @@ groups:
294278 annotations:
295279 description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
296280 summary: Kubernetes API server client is experiencing errors.
297- - alert: KubeControllerManagerDown
298- expr: absent(up{job="kube-controller-manager"} == 1)
299- for: 15m
300- labels:
301- severity: critical
302- annotations:
303- description: KubeControllerManager has disappeared from Prometheus target discovery.
304- summary: Target disappeared from Prometheus target discovery.
305281 - alert: KubeClientCertificateExpiration
306282 expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by(job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
307283 labels:
@@ -334,7 +310,7 @@ groups:
334310 description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.
335311 summary: Kubernetes aggregated API is down.
336312 - alert: KubeAPIDown
337- expr: absent(up{job="apiserver "} == 1)
313+ expr: absent(up{job="kube-admin "} == 1)
338314 for: 15m
339315 labels:
340316 severity: critical
0 commit comments