Skip to content

Commit 20a2b05

Browse files
committed
Fix missing metrics
1 parent b2e6311 commit 20a2b05

15 files changed

+18988
-19000
lines changed

modules/workloads/infra/alerts.tf

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ groups:
211211
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
212212
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
213213
- alert: KubeletPodStartUpLatencyHigh
214-
expr: histogram_quantile(0.99, sum by(cluster, instance, le) (rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet",metrics_path="/metrics"}[5m]))) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet",metrics_path="/metrics"} > 60
214+
expr: histogram_quantile(0.99, sum by(cluster, instance, le) (rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet"}[5m]))) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet"} > 60
215215
for: 15m
216216
labels:
217217
severity: warning
@@ -263,7 +263,7 @@ groups:
263263
description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).
264264
summary: Kubelet has failed to renew its server certificate.
265265
- alert: KubeletDown
266-
expr: absent(up{job="kubelet",metrics_path="/metrics"} == 1)
266+
expr: absent(up{job="kubelet"} == 1)
267267
for: 15m
268268
labels:
269269
severity: critical
@@ -350,31 +350,31 @@ groups:
350350
description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
351351
summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
352352
- alert: KubePersistentVolumeFillingUp
353-
expr: (kubelet_volume_stats_available_bytes{job="kubelet",metrics_path="/metrics",namespace=~".*"} / kubelet_volume_stats_capacity_bytes{job="kubelet",metrics_path="/metrics",namespace=~".*"}) < 0.03 and kubelet_volume_stats_used_bytes{job="kubelet",metrics_path="/metrics",namespace=~".*"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
353+
expr: (kubelet_volume_stats_available_bytes{job="kubelet",namespace=~".*"} / kubelet_volume_stats_capacity_bytes{job="kubelet",namespace=~".*"}) < 0.03 and kubelet_volume_stats_used_bytes{job="kubelet",namespace=~".*"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
354354
for: 1m
355355
labels:
356356
severity: critical
357357
annotations:
358358
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
359359
summary: PersistentVolume is filling up.
360360
- alert: KubePersistentVolumeFillingUp
361-
expr: (kubelet_volume_stats_available_bytes{job="kubelet",metrics_path="/metrics",namespace=~".*"} / kubelet_volume_stats_capacity_bytes{job="kubelet",metrics_path="/metrics",namespace=~".*"}) < 0.15 and kubelet_volume_stats_used_bytes{job="kubelet",metrics_path="/metrics",namespace=~".*"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job="kubelet",metrics_path="/metrics",namespace=~".*"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
361+
expr: (kubelet_volume_stats_available_bytes{job="kubelet",namespace=~".*"} / kubelet_volume_stats_capacity_bytes{job="kubelet",namespace=~".*"}) < 0.15 and kubelet_volume_stats_used_bytes{job="kubelet",namespace=~".*"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job="kubelet",namespace=~".*"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
362362
for: 1h
363363
labels:
364364
severity: warning
365365
annotations:
366366
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days.
367367
summary: PersistentVolume is filling up.
368368
- alert: KubePersistentVolumeInodesFillingUp
369-
expr: (kubelet_volume_stats_inodes_free{job="kubelet",metrics_path="/metrics",namespace=~".*"} / kubelet_volume_stats_inodes{job="kubelet",metrics_path="/metrics",namespace=~".*"}) < 0.03 and kubelet_volume_stats_inodes_used{job="kubelet",metrics_path="/metrics",namespace=~".*"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
369+
expr: (kubelet_volume_stats_inodes_free{job="kubelet",namespace=~".*"} / kubelet_volume_stats_inodes{job="kubelet",namespace=~".*"}) < 0.03 and kubelet_volume_stats_inodes_used{job="kubelet",namespace=~".*"} > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
370370
for: 1m
371371
labels:
372372
severity: critical
373373
annotations:
374374
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes.
375375
summary: PersistentVolumeInodes is filling up.
376376
- alert: KubePersistentVolumeInodesFillingUp
377-
expr: (kubelet_volume_stats_inodes_free{job="kubelet",metrics_path="/metrics",namespace=~".*"} / kubelet_volume_stats_inodes{job="kubelet",metrics_path="/metrics",namespace=~".*"}) < 0.15 and kubelet_volume_stats_inodes_used{job="kubelet",metrics_path="/metrics",namespace=~".*"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{job="kubelet",metrics_path="/metrics",namespace=~".*"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
377+
expr: (kubelet_volume_stats_inodes_free{job="kubelet",namespace=~".*"} / kubelet_volume_stats_inodes{job="kubelet",namespace=~".*"}) < 0.15 and kubelet_volume_stats_inodes_used{job="kubelet",namespace=~".*"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{job="kubelet",namespace=~".*"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode="ReadOnlyMany"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
378378
for: 1h
379379
labels:
380380
severity: warning

0 commit comments

Comments
 (0)