File tree Expand file tree Collapse file tree 1 file changed +20
-0
lines changed
operations/observability/mixins/workspace/rules/satellite Expand file tree Collapse file tree 1 file changed +20
-0
lines changed Original file line number Diff line number Diff line change 45
45
sum by(cluster) (avg_over_time(gitpod_workspace_regular_not_active_percentage_mk2[1m]) > 0)
46
46
AND
47
47
sum by(cluster) (rate(gitpod_ws_manager_mk2_workspace_startup_seconds_sum{type="Regular"}[1m])) == 0
48
+ - alert : GitpodWsManagerMk2BackupFailureError
49
+ labels :
50
+ severity : error
51
+ team : engine
52
+ annotations :
53
+ runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/WorkspaceBackupFailures.md
54
+ summary : Workspace backups failed recently in cluster {{ $labels.cluster }}
55
+ description : This can happen when a single node has failed in the cloud provider
56
+ expr : |
57
+ sum by (cluster) (increase(gitpod_ws_manager_mk2_workspace_backups_failure_total{cluster!~"ephemeral.*"}[1h])) <= 16
58
+ - alert : GitpodWsManagerMk2BackupFailureCritical
59
+ labels :
60
+ severity : critical
61
+ team : engine
62
+ annotations :
63
+ runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/WorkspaceBackupFailures.md
64
+ summary : Workspace backups failed recently in cluster {{ $labels.cluster }}
65
+ description : This can be an indicator of two or more nodes failing in a cloud provider
66
+ expr : |
67
+ sum by (cluster) (increase(gitpod_ws_manager_mk2_workspace_backups_failure_total{cluster!~"ephemeral.*"}[1h])) > 16
You can’t perform that action at this time.
0 commit comments