Skip to content

Commit fd04e3a

Browse files
authored
Remove prometheus-operator alerts (#299)
Signed-off-by: ArthurSens <[email protected]> Signed-off-by: ArthurSens <[email protected]>
1 parent 40e0dd9 commit fd04e3a

File tree

2 files changed

+12
-84
lines changed

2 files changed

+12
-84
lines changed

lib/alert-filter.libsonnet

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,16 @@ local unwatedAlerts = [
6767
'KubeletServerCertificateRenewalErrors',
6868
'KubeletDown', // Re-added to platform-mixin
6969
'KubeClientErrors',
70+
71+
// From prometheus-operator
72+
'PrometheusOperatorListErrors', // Re-added to platform-mixin
73+
'PrometheusOperatorWatchErrors', // Re-added to platform-mixin
74+
'PrometheusOperatorReconcileErrors', // Re-added to platform-mixin
75+
'PrometheusOperatorNodeLookupErrors',
76+
'PrometheusOperatorNotReady',
77+
'ConfigReloaderSidecarErrors', // Re-added to platform-mixin
78+
'PrometheusOperatorRejectedResources',
79+
'PrometheusOperatorSyncFailed',
7080
];
7181

7282
{

monitoring-satellite/manifests/kube-prometheus-rules/rules.yaml

Lines changed: 2 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,91 +1448,9 @@ spec:
14481448
labels:
14491449
severity: critical
14501450
- name: prometheus-operator
1451-
rules:
1452-
- alert: PrometheusOperatorListErrors
1453-
annotations:
1454-
description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
1455-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/PrometheusOperatorListErrors.md
1456-
summary: Errors while performing list operations in controller.
1457-
expr: |
1458-
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring-satellite"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring-satellite"}[10m]))) > 0.4
1459-
for: 15m
1460-
labels:
1461-
severity: warning
1462-
- alert: PrometheusOperatorWatchErrors
1463-
annotations:
1464-
description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
1465-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/PrometheusOperatorWatchErrors.md
1466-
summary: Errors while performing watch operations in controller.
1467-
expr: |
1468-
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m]))) > 0.4
1469-
for: 15m
1470-
labels:
1471-
severity: warning
1472-
- alert: PrometheusOperatorSyncFailed
1473-
annotations:
1474-
description: Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects.
1475-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/PrometheusOperatorSyncFailed.md
1476-
summary: Last controller reconciliation failed
1477-
expr: |
1478-
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="monitoring-satellite"}[5m]) > 0
1479-
for: 10m
1480-
labels:
1481-
severity: warning
1482-
- alert: PrometheusOperatorReconcileErrors
1483-
annotations:
1484-
description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
1485-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/PrometheusOperatorReconcileErrors.md
1486-
summary: Errors while reconciling controller.
1487-
expr: |
1488-
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m]))) > 0.1
1489-
for: 10m
1490-
labels:
1491-
severity: warning
1492-
- alert: PrometheusOperatorNodeLookupErrors
1493-
annotations:
1494-
description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
1495-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/PrometheusOperatorNodeLookupErrors.md
1496-
summary: Errors while reconciling Prometheus.
1497-
expr: |
1498-
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring-satellite"}[5m]) > 0.1
1499-
for: 10m
1500-
labels:
1501-
severity: warning
1502-
- alert: PrometheusOperatorNotReady
1503-
annotations:
1504-
description: Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources.
1505-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/PrometheusOperatorNotReady.md
1506-
summary: Prometheus operator not ready
1507-
expr: |
1508-
min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring-satellite"}[5m]) == 0)
1509-
for: 5m
1510-
labels:
1511-
severity: warning
1512-
- alert: PrometheusOperatorRejectedResources
1513-
annotations:
1514-
description: Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.
1515-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/PrometheusOperatorRejectedResources.md
1516-
summary: Resources rejected by Prometheus operator
1517-
expr: |
1518-
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="monitoring-satellite"}[5m]) > 0
1519-
for: 5m
1520-
labels:
1521-
severity: warning
1451+
rules: []
15221452
- name: config-reloaders
1523-
rules:
1524-
- alert: ConfigReloaderSidecarErrors
1525-
annotations:
1526-
description: |-
1527-
Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.
1528-
As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.
1529-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/ConfigReloaderSidecarErrors.md
1530-
summary: config-reloader sidecar has not had a successful reload for 10m
1531-
expr: |
1532-
max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
1533-
for: 10m
1534-
labels:
1535-
severity: warning
1453+
rules: []
15361454
- name: cert-manager
15371455
rules: []
15381456
- name: certificates

0 commit comments

Comments
 (0)