diff --git a/manifests/0000_90_kube-controller-manager-operator_05_alerts.yaml b/manifests/0000_90_kube-controller-manager-operator_05_alerts.yaml index 348c6bbe0..5f299b9a2 100644 --- a/manifests/0000_90_kube-controller-manager-operator_05_alerts.yaml +++ b/manifests/0000_90_kube-controller-manager-operator_05_alerts.yaml @@ -25,7 +25,8 @@ spec: - alert: PodDisruptionBudgetAtLimit annotations: summary: The pod disruption budget is preventing further disruption to pods. - description: The pod disruption budget is at the minimum disruptions allowed level. The number of current healthy pods is equal to the desired healthy pods. + description: |- + The {{ $labels.poddisruptionbudget }} pod disruption budget in the {{ $labels.namespace }} namespace is at the maximum allowed disruption. The number of current healthy pods is equal to the desired healthy pods.{{ with $console_url := "console_url" | query }}{{ if ne (len (label "url" (first $console_url))) 0}} For more information refer to {{ label "url" (first $console_url) }}/k8s/ns/{{ $labels.namespace }}/poddisruptionbudgets/{{ $labels.poddisruptionbudget }}{{ end }}{{ end }} runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-controller-manager-operator/PodDisruptionBudgetAtLimit.md expr: | max by(namespace, poddisruptionbudget) (kube_poddisruptionbudget_status_current_healthy == kube_poddisruptionbudget_status_desired_healthy and on (namespace, poddisruptionbudget) kube_poddisruptionbudget_status_expected_pods > 0) @@ -35,17 +36,19 @@ spec: - alert: PodDisruptionBudgetLimit annotations: summary: The pod disruption budget registers insufficient amount of pods. - description: The pod disruption budget is below the minimum disruptions allowed level and is not satisfied. The number of current healthy pods is less than the desired healthy pods. + description: |- + The {{ $labels.poddisruptionbudget }} pod disruption budget in the {{ $labels.namespace }} namespace exceeds the maximum allowed disruption and is not satisfied. The number of current healthy pods is {{ $value }} less than the desired healthy pods.{{ with $console_url := "console_url" | query }}{{ if ne (len (label "url" (first $console_url))) 0}} For more information refer to {{ label "url" (first $console_url) }}/k8s/ns/{{ $labels.namespace }}/poddisruptionbudgets/{{ $labels.poddisruptionbudget }}{{ end }}{{ end }} runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-controller-manager-operator/PodDisruptionBudgetLimit.md expr: | - max by (namespace, poddisruptionbudget) (kube_poddisruptionbudget_status_current_healthy < kube_poddisruptionbudget_status_desired_healthy) + max by (namespace, poddisruptionbudget) (kube_poddisruptionbudget_status_desired_healthy - kube_poddisruptionbudget_status_current_healthy) > 0 for: 15m labels: severity: critical - alert: GarbageCollectorSyncFailed annotations: summary: There was a problem with syncing the resources for garbage collection. - description: Garbage Collector had a problem with syncing and monitoring the available resources. Please see KubeControllerManager logs for more details. + description: |- + Garbage Collector had a problem with syncing and monitoring the available resources. Please see KubeControllerManager logs for more details: 'oc -n {{ $labels.namespace }} logs -c {{ $labels.container }} {{ $labels.pod }}'{{ with $console_url := "console_url" | query }}{{ if ne (len (label "url" (first $console_url))) 0}} For more information refer to {{ label "url" (first $console_url) }}/k8s/ns/{{ $labels.namespace }}/pods/{{ $labels.pod }}/logs?container={{ $labels.container }} {{ end }}{{ end }}. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-controller-manager-operator/GarbageCollectorSyncFailed.md expr: | rate(garbagecollector_controller_resources_sync_error_total{}[5m]) > 0