Skip to content

Commit 9de8339

Browse files
authored
[observability] Introduce "ReplicaUnavailable" alerts (#20344)
* [observability] ReplicaMismatch: Improve "the mismatch is 1.0" message * [observability] Introduce "ReplicasUnavailable" alert (as warning for now)
1 parent 97baff5 commit 9de8339

File tree

4 files changed

+48
-4
lines changed

4 files changed

+48
-4
lines changed

operations/observability/mixins/workspace/rules/central/image-builder.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,18 @@ spec:
4747
annotations:
4848
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
4949
summary: Desired number of replicas for image-builder-mk3 are not available in cluster {{ $labels.cluster }}
50-
description: The mismatch is {{ printf "%.2f" $value }}
50+
description: 'Desired number of replicas for image-builder-mk3 are not available in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }} are missing'
5151
expr: |
5252
kube_deployment_spec_replicas{deployment="image-builder-mk3", cluster!~"ephemeral.*"} != kube_deployment_status_replicas_available{deployment="image-builder-mk3", cluster!~"ephemeral.*"}
5353
for: 3m
54+
- alert: GitpodImageBuilderMk3ReplicaUnavailable
55+
labels:
56+
# TODO(gpl): warning for now, to set it up and fine-tune it
57+
severity: warning
58+
annotations:
59+
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
60+
summary: image-builder-mk3 replicas are unavailable in cluster {{ $labels.cluster }}
61+
description: 'image-builder-mk3 pods are unavailable in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }}'
62+
expr: |
63+
kube_deployment_status_replicas_unavailable{deployment="image-builder-mk3", cluster!~"ephemeral.*"} > 0
64+
for: 10m

operations/observability/mixins/workspace/rules/central/node-labeler.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,18 @@ spec:
3131
annotations:
3232
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
3333
summary: Desired number of replicas for node-labeler are not available in cluster {{ $labels.cluster }}
34-
description: The mismatch is {{ printf "%.2f" $value }}
34+
description: 'Desired number of replicas for node-labeler are not available in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }} are missing'
3535
expr: |
3636
kube_deployment_spec_replicas{deployment="node-labeler", cluster!~"ephemeral.*"} != kube_deployment_status_replicas_available{deployment="node-labeler", cluster!~"ephemeral.*"}
3737
for: 3m
38+
- alert: GitpodNodeLabelerReplicaUnavailable
39+
labels:
40+
# TODO(gpl): warning for now, to set it up and fine-tune it
41+
severity: warning
42+
annotations:
43+
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
44+
summary: node-labeler replicas are unavailable in cluster {{ $labels.cluster }}
45+
description: 'node-labeler pods are unavailable in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }}'
46+
expr: |
47+
kube_deployment_status_replicas_unavailable{deployment="node-labeler", cluster!~"ephemeral.*"} > 0
48+
for: 10m

operations/observability/mixins/workspace/rules/central/ws-manager.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,18 @@ spec:
3131
annotations:
3232
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
3333
summary: Desired number of replicas for ws-manager-mk2 are not available in cluster {{ $labels.cluster }}
34-
description: The mismatch is {{ printf "%.2f" $value }}
34+
description: 'Desired number of replicas for ws-manager-mk2 are not available in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }} are missing'
3535
expr: |
3636
kube_deployment_spec_replicas{deployment="ws-manager-mk2", cluster!~"ephemeral.*"} != kube_deployment_status_replicas_available{deployment="ws-manager-mk2", cluster!~"ephemeral.*"}
3737
for: 3m
38+
- alert: GitpodWsManagerMk2ReplicaUnavailable
39+
labels:
40+
# TODO(gpl): warning for now, to set it up and fine-tune it
41+
severity: warning
42+
annotations:
43+
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
44+
summary: ws-manager-mk2 replicas are unavailable in cluster {{ $labels.cluster }}
45+
description: 'ws-manager-mk2 pods are unavailable in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }}'
46+
expr: |
47+
kube_deployment_status_replicas_unavailable{deployment="ws-manager-mk2", cluster!~"ephemeral.*"} > 0
48+
for: 10m

operations/observability/mixins/workspace/rules/central/ws-proxy.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,18 @@ spec:
3131
annotations:
3232
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
3333
summary: Desired number of replicas for ws-proxy are not available in cluster {{ $labels.cluster }}
34-
description: The mismatch is {{ printf "%.2f" $value }}
34+
description: 'Desired number of replicas for ws-proxy are not available in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }} are missing'
3535
expr: |
3636
kube_deployment_spec_replicas{deployment="ws-proxy", cluster!~"ephemeral.*"} != kube_deployment_status_replicas_available{deployment="ws-proxy", cluster!~"ephemeral.*"}
3737
for: 3m
38+
- alert: GitpodWsProxyMk2ReplicaUnavailable
39+
labels:
40+
# TODO(gpl): warning for now, to set it up and fine-tune it
41+
severity: warning
42+
annotations:
43+
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodWorkspaceDeploymentReplicaMismatch.md
44+
summary: ws-proxy replicas are unavailable in cluster {{ $labels.cluster }}
45+
description: 'ws-proxy pods are unavailable in cluster {{ $labels.cluster }}: {{ printf "%.2f" $value }}'
46+
expr: |
47+
kube_deployment_status_replicas_unavailable{deployment="ws-proxy", cluster!~"ephemeral.*"} > 0
48+
for: 10m

0 commit comments

Comments
 (0)