Skip to content

Commit b101947

Browse files
Merge pull request #1653 from yeya24/add-CsvAbnormalReplacingOver30MinSRE-alert
Bug 1916624: Add CsvAbnornalReplacing alerts
2 parents 98f708c + 7c29485 commit b101947

File tree

2 files changed

+28
-6
lines changed

2 files changed

+28
-6
lines changed

deploy/chart/templates/0000_90_olm_01-prometheus-rule.yaml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,23 @@ metadata:
99
role: alert-rules
1010
spec:
1111
groups:
12-
- name: olm.failing_operators.rules
12+
- name: olm.csv_abnormal.rules
1313
rules:
14-
- alert: FailingOperator
14+
- alert: CsvAbnormalFailedOver2Min
15+
expr: csv_abnormal{phase=~"^Failed$"}
16+
for: 2m
17+
labels:
18+
severity: warning
19+
namespace: "{{ "{{ $labels.namespace }}" }}"
1520
annotations:
1621
message: Failed to install Operator {{ printf "{{ $labels.name }}" }} version {{ printf "{{ $labels.version }}" }}. Reason-{{ printf "{{ $labels.reason }}" }}
17-
expr: csv_abnormal{phase="Failed"}
22+
23+
- alert: CsvAbnormalOver30Min
24+
expr: csv_abnormal{phase=~"(^Replacing$|^Pending$|^Deleting$|^Unknown$)"}
25+
for: 30m
1826
labels:
1927
severity: warning
28+
namespace: "{{ "{{ $labels.namespace }}" }}"
29+
annotations:
30+
message: Failed to install Operator {{ printf "{{ $labels.name }}" }} version {{ printf "{{ $labels.version }}" }}. Phase-{{ printf "{{ $labels.phase }}" }} Reason-{{ printf "{{ $labels.reason }}" }}
2031
{{ end }}

manifests/0000_90_olm_01-prometheus-rule.yaml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,22 @@ metadata:
1111
include.release.openshift.io/single-node-developer: "true"
1212
spec:
1313
groups:
14-
- name: olm.failing_operators.rules
14+
- name: olm.csv_abnormal.rules
1515
rules:
16-
- alert: FailingOperator
16+
- alert: CsvAbnormalFailedOver2Min
17+
expr: csv_abnormal{phase=~"^Failed$",exported_namespace=~"(^openshift.*|^kube.*|^redhat.*|^default$)"}
18+
for: 2m
19+
labels:
20+
severity: warning
21+
namespace: "{{ $labels.namespace }}"
1722
annotations:
1823
message: Failed to install Operator {{ $labels.name }} version {{ $labels.version }}. Reason-{{ $labels.reason }}
19-
expr: csv_abnormal{phase="Failed"}
24+
25+
- alert: CsvAbnormalOver30Min
26+
expr: csv_abnormal{phase=~"(^Replacing$|^Pending$|^Deleting$|^Unknown$)",exported_namespace=~"(^openshift.*|^kube.*|^redhat.*|^default$)"}
27+
for: 30m
2028
labels:
2129
severity: warning
30+
namespace: "{{ $labels.namespace }}"
31+
annotations:
32+
message: Failed to install Operator {{ $labels.name }} version {{ $labels.version }}. Phase-{{ $labels.phase }} Reason-{{ $labels.reason }}

0 commit comments

Comments
 (0)