Skip to content

Commit 8ce41cd

Browse files
authored
fix: Ensure job label present in Down alerts (#1120)
1 parent 7f553ec commit 8ce41cd

File tree

4 files changed

+112
-4
lines changed

4 files changed

+112
-4
lines changed

Makefile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ OUT_DIR ?=dashboards_out
2020
all: fmt generate lint test
2121

2222
.PHONY: dev
23-
dev: generate
23+
dev: generate lint
2424
@cd scripts && ./lgtm.sh && \
2525
echo '' && \
2626
echo '╔═══════════════════════════════════════════════════════════════╗' && \
@@ -43,9 +43,10 @@ dev-port-forward:
4343
kubectl --context kind-kubernetes-mixin wait --for=condition=Ready pods -l app=lgtm --timeout=300s
4444
kubectl --context kind-kubernetes-mixin port-forward service/lgtm 3000:3000 4317:4317 4318:4318 9090:9090
4545

46-
dev-reload: generate
46+
dev-reload: generate lint
4747
@cp -v prometheus_alerts.yaml scripts/provisioning/prometheus/ && \
4848
cp -v prometheus_rules.yaml scripts/provisioning/prometheus/ && \
49+
kubectl --context kind-kubernetes-mixin apply -f scripts/lgtm.yaml && \
4950
kubectl --context kind-kubernetes-mixin rollout restart deployment/lgtm && \
5051
echo '╔═══════════════════════════════════════════════════════════════╗' && \
5152
echo '║ ║' && \
@@ -58,8 +59,14 @@ dev-reload: generate
5859
dev-down:
5960
kind delete cluster --name kubernetes-mixin
6061

62+
clean-alerts:
63+
rm -f prometheus_alerts.yaml
64+
65+
clean-rules:
66+
rm -f prometheus_rules.yaml
67+
6168
.PHONY: generate
62-
generate: prometheus_alerts.yaml prometheus_rules.yaml $(OUT_DIR)
69+
generate: clean-alerts clean-rules prometheus_alerts.yaml prometheus_rules.yaml $(OUT_DIR)
6370

6471
$(JSONNET_VENDOR): $(JB_BIN) jsonnetfile.json
6572
$(JB_BIN) install

lib/absent_alert.libsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
alert: '%sDown' % absentAlert.componentName,
77
expr: |||
8-
absent(up{%s} == 1)
8+
absent(up{%s})
99
||| % absentAlert.selector,
1010
'for': '15m',
1111
labels: {

scripts/lgtm.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ spec:
4848
- containerPort: 4317
4949
- containerPort: 4318
5050
- containerPort: 9090
51+
env:
52+
- name: ENABLE_LOGS_ALL
53+
value: "true"
5154
readinessProbe:
5255
exec:
5356
command:

tests/absent_alert-test.yaml

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
rule_files:
2+
- ../prometheus_alerts.yaml
3+
4+
tests:
5+
- interval: 1m
6+
name: KubeAPIDown fires when kube-apiserver target is absent
7+
input_series:
8+
- series: 'up{job="kube-apiserver", instance="apiserver1"}'
9+
values: '1 1 1 1 1 0 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _'
10+
alert_rule_test:
11+
- eval_time: 10m
12+
alertname: KubeAPIDown
13+
- eval_time: 25m
14+
alertname: KubeAPIDown
15+
exp_alerts:
16+
- exp_labels:
17+
severity: "critical"
18+
job: "kube-apiserver"
19+
exp_annotations:
20+
description: "KubeAPI has disappeared from Prometheus target discovery."
21+
summary: "Target disappeared from Prometheus target discovery."
22+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown"
23+
24+
- interval: 1m
25+
name: KubeletDown fires when kubelet target is absent
26+
input_series:
27+
- series: 'up{job="kubelet", instance="node1"}'
28+
values: '1 1 1 1 1 0 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _'
29+
alert_rule_test:
30+
- eval_time: 10m
31+
alertname: KubeletDown
32+
- eval_time: 25m
33+
alertname: KubeletDown
34+
exp_alerts:
35+
- exp_labels:
36+
severity: "critical"
37+
job: "kubelet"
38+
exp_annotations:
39+
description: "Kubelet has disappeared from Prometheus target discovery."
40+
summary: "Target disappeared from Prometheus target discovery."
41+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown"
42+
43+
- interval: 1m
44+
name: KubeSchedulerDown fires when kube-scheduler target is absent
45+
input_series:
46+
- series: 'up{job="kube-scheduler", instance="scheduler1"}'
47+
values: '1 1 1 1 1 0 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _'
48+
alert_rule_test:
49+
- eval_time: 10m
50+
alertname: KubeSchedulerDown
51+
- eval_time: 25m
52+
alertname: KubeSchedulerDown
53+
exp_alerts:
54+
- exp_labels:
55+
severity: "critical"
56+
job: "kube-scheduler"
57+
exp_annotations:
58+
description: "KubeScheduler has disappeared from Prometheus target discovery."
59+
summary: "Target disappeared from Prometheus target discovery."
60+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown"
61+
62+
- interval: 1m
63+
name: KubeControllerManagerDown fires when kube-controller-manager target is absent
64+
input_series:
65+
- series: 'up{job="kube-controller-manager", instance="controller1"}'
66+
values: '1 1 1 1 1 0 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _'
67+
alert_rule_test:
68+
- eval_time: 10m
69+
alertname: KubeControllerManagerDown
70+
- eval_time: 25m
71+
alertname: KubeControllerManagerDown
72+
exp_alerts:
73+
- exp_labels:
74+
severity: "critical"
75+
job: "kube-controller-manager"
76+
exp_annotations:
77+
description: "KubeControllerManager has disappeared from Prometheus target discovery."
78+
summary: "Target disappeared from Prometheus target discovery."
79+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown"
80+
81+
- interval: 1m
82+
name: KubeProxyDown fires when kube-proxy target is absent
83+
input_series:
84+
- series: 'up{job="kube-proxy", instance="proxy1"}'
85+
values: '1 1 1 1 1 0 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _'
86+
alert_rule_test:
87+
- eval_time: 10m
88+
alertname: KubeProxyDown
89+
- eval_time: 25m
90+
alertname: KubeProxyDown
91+
exp_alerts:
92+
- exp_labels:
93+
severity: "critical"
94+
job: "kube-proxy"
95+
exp_annotations:
96+
description: "KubeProxy has disappeared from Prometheus target discovery."
97+
summary: "Target disappeared from Prometheus target discovery."
98+
runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeproxydown"

0 commit comments

Comments
 (0)