Skip to content

Commit a0586a0

Browse files
committed
alerts: Improve DaemonSet rollout alert taking progress into account
1 parent ae234b7 commit a0586a0

File tree

1 file changed

+24
-4
lines changed

1 file changed

+24
-4
lines changed

alerts/apps_alerts.libsonnet

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,15 +150,35 @@
150150
{
151151
alert: 'KubeDaemonSetRolloutStuck',
152152
expr: |||
153-
kube_daemonset_status_number_ready{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
154-
/
155-
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} < 1.00
153+
(
154+
(
155+
kube_daemonset_status_current_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
156+
!=
157+
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
158+
) or (
159+
kube_daemonset_status_number_misscheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
160+
!=
161+
0
162+
) or (
163+
kube_daemonset_updated_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
164+
!=
165+
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
166+
) or (
167+
kube_daemonset_status_number_available{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
168+
!=
169+
kube_daemonset_status_desired_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}
170+
)
171+
) and (
172+
changes(kube_daemonset_updated_number_scheduled{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}[5m])
173+
==
174+
0
175+
)
156176
||| % $._config,
157177
labels: {
158178
severity: 'warning',
159179
},
160180
annotations: {
161-
message: 'Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.',
181+
message: 'DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.',
162182
},
163183
'for': '15m',
164184
},

0 commit comments

Comments
 (0)