Skip to content

Commit 9eda302

Browse files
author
Matt Pryor
authored
Add dashboard and alerts for Velero backups (#681)
1 parent f0a7bab commit 9eda302

File tree

3 files changed

+1198
-0
lines changed

3 files changed

+1198
-0
lines changed

roles/velero/defaults/main.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,34 @@ velero_release_namespace: velero
9494
velero_release_name: velero
9595
velero_wait_timeout: 10m
9696
velero_release_defaults:
97+
metrics:
98+
enabled: true
99+
serviceMonitor:
100+
enabled: true
101+
prometheusRule:
102+
enabled: true
103+
spec:
104+
- alert: VeleroBackupPartialFailures
105+
annotations:
106+
# Use the unsafe tag to prevent Ansible trying to render it as a template
107+
message: !unsafe >-
108+
Velero schedule '{{ $labels.schedule }}' has partially failed backups in the last 24 hours.
109+
expr: |-
110+
sum(increase(velero_backup_partial_failure_total{schedule!=""}[24h])) by(schedule) > 0
111+
for: 15m
112+
labels:
113+
severity: warning
114+
115+
- alert: VeleroBackupFailures
116+
annotations:
117+
# Use the unsafe tag to prevent Ansible trying to render it as a template
118+
message: !unsafe >-
119+
Velero schedule '{{ $labels.schedule }}' has failed backups in the last 24 hours.
120+
expr: |-
121+
sum(increase(velero_backup_failure_total{schedule!=""}[24h])) by(schedule) > 0
122+
for: 15m
123+
labels:
124+
severity: warning
97125
configuration:
98126
features: EnableCSI
99127
backupStorageLocation:

0 commit comments

Comments
 (0)