|
| 1 | +groups: |
| 2 | +- name: cnp-default.rules |
| 3 | + rules: |
| 4 | + - alert: LongRunningTransaction |
| 5 | + annotations: |
| 6 | + description: Pod {{ $labels.pod }} is taking more than 5 minutes (300 seconds) for a query. |
| 7 | + summary: A query is taking longer than 5 minutes. |
| 8 | + expr: |- |
| 9 | + cnpg_backends_max_tx_duration_seconds > 300 |
| 10 | + for: 1m |
| 11 | + labels: |
| 12 | + severity: warning |
| 13 | + - alert: BackendsWaiting |
| 14 | + annotations: |
| 15 | + description: Pod {{ $labels.pod }} has been waiting for longer than 5 minutes |
| 16 | + summary: If a backend is waiting for longer than 5 minutes |
| 17 | + expr: |- |
| 18 | + cnpg_backends_waiting_total > 300 |
| 19 | + for: 1m |
| 20 | + labels: |
| 21 | + severity: warning |
| 22 | + - alert: PGDatabase |
| 23 | + annotations: |
| 24 | + description: Over 300,000,000 transactions from frozen xid on pod {{ $labels.pod }} |
| 25 | + summary: Number of transactions from the frozen XID to the current one |
| 26 | + expr: |- |
| 27 | + cnpg_pg_database_xid_age > 300000000 |
| 28 | + for: 1m |
| 29 | + labels: |
| 30 | + severity: warning |
| 31 | + - alert: PGReplication |
| 32 | + annotations: |
| 33 | + description: Standby is lagging behind by over 300 seconds (5 minutes) |
| 34 | + summary: The standby is lagging behind the primary |
| 35 | + expr: |- |
| 36 | + cnpg_pg_replication_lag > 300 |
| 37 | + for: 1m |
| 38 | + labels: |
| 39 | + severity: warning |
| 40 | + - alert: LastFailedArchiveTime |
| 41 | + annotations: |
| 42 | + description: Archiving failed for {{ $labels.pod }} |
| 43 | + summary: Checks the last time archiving failed. Will be < 0 when it has not failed. |
| 44 | + expr: |- |
| 45 | + (cnpg_pg_stat_archiver_last_failed_time - cnpg_pg_stat_archiver_last_archived_time) > 1 |
| 46 | + for: 1m |
| 47 | + labels: |
| 48 | + severity: warning |
| 49 | + - alert: DatabaseDeadlockConflicts |
| 50 | + annotations: |
| 51 | + description: There are over 10 deadlock conflicts in {{ $labels.pod }} |
| 52 | + summary: Checks the number of database conflicts |
| 53 | + expr: |- |
| 54 | + cnpg_pg_stat_database_deadlocks > 10 |
| 55 | + for: 1m |
| 56 | + labels: |
| 57 | + severity: warning |
| 58 | + - alert: ReplicaFailingReplication |
| 59 | + annotations: |
| 60 | + description: Replica {{ $labels.pod }} is failing to replicate |
| 61 | + summary: Checks if the replica is failing to replicate |
| 62 | + expr: |- |
| 63 | + cnpg_pg_replication_in_recovery > cnpg_pg_replication_is_wal_receiver_up |
| 64 | + for: 1m |
| 65 | + labels: |
| 66 | + severity: warning |
0 commit comments