Skip to content

Commit 69fd473

Browse files
authored
backport(track/3.5): Look for increases in counter for missing_pods Prometheus alert rules (#312)
This PR is a backport of #310 to `track/3.5`
2 parents 2fb44ef + c8fe455 commit 69fd473

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

charms/argo-controller/src/grafana_dashboards/basic.json.tmpl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@
469469
"datasource": "${prometheusds}",
470470
"editorMode": "builder",
471471
"exemplar": false,
472-
"expr": "argo_pod_missing{juju_application=~\"$juju_application\",juju_model=~\"$juju_model\",juju_model_uuid=~\"$juju_model_uuid\",juju_unit=~\"$juju_unit\"}",
472+
"expr": "sum(increase(argo_workflows_pod_missing_total{juju_application=~\"$juju_application\",juju_model=~\"$juju_model\",juju_model_uuid=~\"$juju_model_uuid\",juju_unit=~\"$juju_unit\"}[10m])) by (node_phase)",
473473
"format": "time_series",
474474
"instant": false,
475475
"interval": "",
@@ -478,7 +478,7 @@
478478
"refId": "A"
479479
}
480480
],
481-
"title": "Workflows missing Pods",
481+
"title": "Workflows missing Pods in the past 10 minutes",
482482
"transparent": true,
483483
"type": "gauge"
484484
},
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
alert: ArgoWorkflowPodsMissing
2-
expr: max_over_time(argo_pod_missing[5m]) > 0
2+
expr: sum without (node_phase, recently_started) (increase(argo_workflows_pod_missing_total[10m])) > 0
33
for: 5m
44
labels:
55
severity: critical
66
annotations:
77
summary: "Missing workflow pods detected"
88
description: >
9-
Detected missing workflow pods in the last 5 minutes.
10-
Missing pods are expected pods that never appeared or were deleted.
11-
See https://argo-workflows.readthedocs.io/en/release-3.5/metrics/#argo_pod_missing for details.
9+
The number of missing workflow pods has increased in the last 5 minutes.
10+
This indicates that expected pods never appeared or were deleted unexpectedly.
11+
See https://argo-workflows.readthedocs.io/en/release-3.7/metrics/#pod_missing for details.

0 commit comments

Comments
 (0)