Skip to content

Commit f02ad30

Browse files
drew-vilesehbello
authored andcommitted
fix: updating the prometheus rules to remove errors
Signed-off-by: Drew Hudson-Viles <drew@hudson-viles.uk>
1 parent b569d69 commit f02ad30

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

charts/cluster/prometheus_rules/cluster-logical_replication_errors-critical.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ annotations:
99
CRITICAL: High error rate indicates persistent replication issues requiring immediate attention. This could lead to significant data inconsistency or complete replication failure. Errors include both apply errors and sync errors. The subscription may stop working if errors continue.
1010
runbook_url: https://github.com/maarlab-rethinking/cloudnative-pg-charts/blob/main/charts/cluster/docs/runbooks/CNPGClusterLogicalReplicationErrors.md
1111
expr: |
12-
label_replace(increase(max by (namespace, job, subname) (cnpg_pg_stat_subscription_apply_error_count + cnpg_pg_stat_subscription_sync_error_count)[5m]), "cluster", "$1", "job", ".+/(.+)") >= 5
12+
label_replace(max by (namespace, job, subname) (increase(cnpg_pg_stat_subscription_apply_error_count[5m]) + increase(cnpg_pg_stat_subscription_sync_error_count[5m])), "cluster", "$1", "job", ".+/(.+)") >= 5
1313
for: 0m
1414
labels:
1515
severity: critical

charts/cluster/prometheus_rules/cluster-logical_replication_errors.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ annotations:
99
This includes both apply errors (during normal replication) and sync errors (during initial table sync). Errors indicate data consistency issues that need immediate attention to prevent data divergence.
1010
runbook_url: https://github.com/maarlab-rethinking/cloudnative-pg-charts/blob/main/charts/cluster/docs/runbooks/{{ $alert }}.md
1111
expr: |
12-
label_replace(increase(max by (namespace, job, subname) (cnpg_pg_stat_subscription_apply_error_count + cnpg_pg_stat_subscription_sync_error_count)[5m]), "cluster", "$1", "job", ".+/(.+)") > 0
12+
label_replace(max by (namespace, job, subname) (increase(cnpg_pg_stat_subscription_apply_error_count[5m]) + increase(cnpg_pg_stat_subscription_sync_error_count[5m])), "cluster", "$1", "job", ".+/(.+)") > 0
1313
for: 1m
1414
labels:
1515
severity: warning

charts/cluster/prometheus_rules/cluster-logical_replication_stopped-critical.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ expr: |
1818
) or (
1919
# Subscription is enabled but stuck (no worker process with significant lag)
2020
label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_enabled), "cluster", "$1", "job", ".+/(.+)") == 1
21-
and label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_pid), "cluster", "$1", "job", ".+/(.+)") == ""
21+
and label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_pid), "cluster", "$1", "job", ".+/(.+)") == 0
2222
and label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_buffered_lag_bytes), "cluster", "$1", "job", ".+/(.+)") / 1024^3 > 0.1
2323
)
2424
for: 15m

charts/cluster/prometheus_rules/cluster-logical_replication_stopped.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ expr: |
1717
) or (
1818
# Subscription is enabled but stuck (no worker process with significant lag)
1919
label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_enabled), "cluster", "$1", "job", ".+/(.+)") == 1
20-
and label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_pid), "cluster", "$1", "job", ".+/(.+)") == ""
20+
and label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_pid), "cluster", "$1", "job", ".+/(.+)") == 0
2121
and label_replace(max by (namespace, job, subname) (cnpg_pg_stat_subscription_buffered_lag_bytes), "cluster", "$1", "job", ".+/(.+)") / 1024^3 > 0.1
2222
)
2323
for: 5m

0 commit comments

Comments
 (0)