prometheus-operator
diff --git a/‎manifests/alertmanager-prometheusRule.yaml‎
Lines changed: 31 additions & 13 deletions b/‎manifests/alertmanager-prometheusRule.yaml‎
Lines changed: 31 additions & 13 deletions
diff --git a/‎manifests/kube-prometheus-prometheusRule.yaml‎
Lines changed: 15 additions & 7 deletions b/‎manifests/kube-prometheus-prometheusRule.yaml‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎manifests/kube-state-metrics-prometheusRule.yaml‎
Lines changed: 11 additions & 4 deletions b/‎manifests/kube-state-metrics-prometheusRule.yaml‎
Lines changed: 11 additions & 4 deletions
@@ -16,7 +16,8 @@ spec:
     rules:
     - alert: AlertmanagerFailedReload
       annotations:
-        description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}.
+        description: Configuration has failed to load for {{ $labels.namespace }}/{{
+          $labels.pod}}.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload
         summary: Reloading an Alertmanager configuration has failed.
       expr: |
@@ -28,9 +29,11 @@ spec:
         severity: critical
     - alert: AlertmanagerMembersInconsistent
       annotations:
-        description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster.
+        description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
+          found {{ $value }} members of the {{$labels.job}} cluster.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent
-        summary: A member of an Alertmanager cluster has not found all other cluster members.
+        summary: A member of an Alertmanager cluster has not found all other cluster
+          members.
       expr: |
         # Without max_over_time, failed scrapes could create false negatives, see
         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@@ -42,7 +45,9 @@ spec:
         severity: critical
     - alert: AlertmanagerFailedToSendAlerts
       annotations:
-        description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}.
+        description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
+          to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
+          }}.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts
         summary: An Alertmanager instance failed to send notifications.
       expr: |
@@ -57,9 +62,12 @@ spec:
         severity: warning
     - alert: AlertmanagerClusterFailedToSendAlerts
       annotations:
-        description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
+        description: The minimum notification failure rate to {{ $labels.integration
+          }} sent from any instance in the {{$labels.job}} cluster is {{ $value |
+          humanizePercentage }}.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
-        summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
+        summary: All Alertmanager instances in a cluster failed to send notifications
+          to a critical integration.
       expr: |
         min by (namespace,service, integration) (
           rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
@@ -72,9 +80,12 @@ spec:
         severity: critical
     - alert: AlertmanagerClusterFailedToSendAlerts
       annotations:
-        description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
+        description: The minimum notification failure rate to {{ $labels.integration
+          }} sent from any instance in the {{$labels.job}} cluster is {{ $value |
+          humanizePercentage }}.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
-        summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
+        summary: All Alertmanager instances in a cluster failed to send notifications
+          to a non-critical integration.
       expr: |
         min by (namespace,service, integration) (
           rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
@@ -87,7 +98,8 @@ spec:
         severity: warning
     - alert: AlertmanagerConfigInconsistent
       annotations:
-        description: Alertmanager instances within the {{$labels.job}} cluster have different configurations.
+        description: Alertmanager instances within the {{$labels.job}} cluster have
+          different configurations.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent
         summary: Alertmanager instances within the same cluster have different configurations.
       expr: |
@@ -100,9 +112,12 @@ spec:
         severity: critical
     - alert: AlertmanagerClusterDown
       annotations:
-        description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.'
+        description: '{{ $value | humanizePercentage }} of Alertmanager instances
+          within the {{$labels.job}} cluster have been up for less than half of the
+          last 5m.'
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown
-        summary: Half or more of the Alertmanager instances within the same cluster are down.
+        summary: Half or more of the Alertmanager instances within the same cluster
+          are down.
       expr: |
         (
           count by (namespace,service) (
@@ -119,9 +134,12 @@ spec:
         severity: critical
     - alert: AlertmanagerClusterCrashlooping
       annotations:
-        description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.'
+        description: '{{ $value | humanizePercentage }} of Alertmanager instances
+          within the {{$labels.job}} cluster have restarted at least 5 times in the
+          last 10m.'
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping
-        summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
+        summary: Half or more of the Alertmanager instances within the same cluster
+          are crashlooping.
       expr: |
         (
           count by (namespace,service) (
 
@@ -15,10 +15,12 @@ spec:
     rules:
     - alert: TargetDown
       annotations:
-        description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
+        description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
+          }} targets in {{ $labels.namespace }} namespace are down.'
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
         summary: One or more targets are unreachable.
-      expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
+      expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
+        namespace, service)) > 10
       for: 10m
       labels:
         severity: warning
@@ -31,15 +33,17 @@ spec:
           mechanisms that send a notification when this alert is not firing. For example the
           "DeadMansSnitch" integration in PagerDuty.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
-        summary: An alert that should always be firing to certify that Alertmanager is working properly.
+        summary: An alert that should always be firing to certify that Alertmanager
+          is working properly.
       expr: vector(1)
       labels:
         severity: none
   - name: node-network
     rules:
     - alert: NodeNetworkInterfaceFlapping
       annotations:
-        description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
+        description: Network interface "{{ $labels.device }}" changing its up status
+          often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping
         summary: Network interface is often changing its status
       expr: |
@@ -49,17 +53,21 @@ spec:
         severity: warning
   - name: kube-prometheus-node-recording.rules
     rules:
-    - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)
+    - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
+        BY (instance)
       record: instance:node_cpu:rate:sum
     - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
       record: instance:node_network_receive_bytes:rate:sum
     - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
       record: instance:node_network_transmit_bytes:rate:sum
-    - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
+    - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
+        WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
+        BY (instance, cpu)) BY (instance)
       record: instance:node_cpu:ratio
     - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
       record: cluster:node_cpu:sum_rate5m
-    - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
+    - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
+        BY (instance, cpu))
       record: cluster:node_cpu:ratio
   - name: kube-prometheus-general.rules
     rules:
 
@@ -16,7 +16,9 @@ spec:
     rules:
     - alert: KubeStateMetricsListErrors
       annotations:
-        description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+        description: kube-state-metrics is experiencing errors at an elevated rate
+          in list operations. This is likely causing it to not be able to expose metrics
+          about Kubernetes objects correctly or at all.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors
         summary: kube-state-metrics is experiencing errors in list operations.
       expr: |
@@ -29,7 +31,9 @@ spec:
         severity: critical
     - alert: KubeStateMetricsWatchErrors
       annotations:
-        description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+        description: kube-state-metrics is experiencing errors at an elevated rate
+          in watch operations. This is likely causing it to not be able to expose
+          metrics about Kubernetes objects correctly or at all.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors
         summary: kube-state-metrics is experiencing errors in watch operations.
       expr: |
@@ -42,7 +46,9 @@ spec:
         severity: critical
     - alert: KubeStateMetricsShardingMismatch
       annotations:
-        description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
+        description: kube-state-metrics pods are running with different --total-shards
+          configuration, some Kubernetes objects may be exposed multiple times or
+          not exposed at all.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch
         summary: kube-state-metrics sharding is misconfigured.
       expr: |
@@ -52,7 +58,8 @@ spec:
         severity: critical
     - alert: KubeStateMetricsShardsMissing
       annotations:
-        description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
+        description: kube-state-metrics shards are missing, some Kubernetes objects
+          are not being exposed.
         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
         summary: kube-state-metrics shards are missing.
       expr: |