Skip to content

Commit 5da4c2e

Browse files
chore(mongodb): update mongodb mixin (#1541)
* chore(mongodb): update mongodb mixin - Add filteringSelector/groupLabels/instanceLabels for alerts only at the moment - Tidy up alerts - Tidy up dashboards titles (to Sentence case) - Update default colors of cluster statuses (avoid traffic light similarities) * Update alert --------- Co-authored-by: Emily <[email protected]>
1 parent b42789a commit 5da4c2e

File tree

13 files changed

+347
-309
lines changed

13 files changed

+347
-309
lines changed

mongodb-mixin/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# MongoDB Mixin
1+
# MongoDB mixin
22

3-
The MongoDB Mixin is a set of configurable, reusable, and extensible alerts and dashboards based on the metrics exported by [Percona MongoDB Exporter](https://github.com/percona/mongodb_exporter).
3+
The MongoDB mixin is a set of configurable, reusable, and extensible alerts and dashboards based on the metrics exported by [Percona MongoDB Exporter](https://github.com/percona/mongodb_exporter).
44

55
This mixin includes 3 dashboards suited for MongoDB, namely MongoDB Cluster, MongoDB Instance and MongoDB ReplicaSet.
66

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet';
2+
{
3+
prometheusAlerts+:: {
4+
local config =
5+
$._config
6+
{
7+
agg: std.join(',', $._config.groupLabels + $._config.instanceLabels),
8+
aggCluster: std.join(',', $._config.groupLabels),
9+
instanceLabel: xtd.array.slice($._config.instanceLabels, -1)[0],
10+
groupLabel: xtd.array.slice($._config.groupLabels, -1)[0],
11+
12+
},
13+
groups+: [
14+
{
15+
name: 'MongodbAlerts',
16+
rules: [
17+
{
18+
alert: 'MongodbDown',
19+
annotations: {
20+
summary: 'MongoDB instance is down.',
21+
description: 'MongoDB instance {{ $labels.%(instanceLabel)s }} is down.' % config,
22+
},
23+
expr: 'mongodb_up{%(filteringSelector)s} == 0' % config,
24+
'for': '5m',
25+
labels: {
26+
severity: 'critical',
27+
},
28+
},
29+
{
30+
alert: 'MongodbReplicaMemberUnhealthy',
31+
annotations: {
32+
description: 'MongoDB replica member is unhealthy (instance {{ $labels.%(instanceLabel)s }}).' % config,
33+
summary: 'MongoDB replica member is unhealthy.',
34+
},
35+
expr: 'mongodb_mongod_replset_member_health{%(filteringSelector)s} == 0' % config,
36+
labels: {
37+
severity: 'critical',
38+
},
39+
},
40+
{
41+
alert: 'MongodbReplicationLag',
42+
annotations: {
43+
description: 'MongoDB replication lag is more than 60s (instance {{ $labels.%(instanceLabel)s }})' % config,
44+
summary: 'MongoDB replication lag is exceeding the threshold.',
45+
},
46+
expr: 'mongodb_mongod_replset_member_replication_lag{state="SECONDARY", %(filteringSelector)s} > 60' % config,
47+
'for': '5m',
48+
labels: {
49+
severity: 'critical',
50+
},
51+
},
52+
{
53+
alert: 'MongodbReplicationHeadroom',
54+
annotations: {
55+
description: 'MongoDB replication headroom is <= 0 for {{ $labels.%(groupLabel)s }}.' % config,
56+
summary: 'MongoDB replication headroom is exceeding the threshold.',
57+
},
58+
expr: '(avg by (%(aggCluster)s) (mongodb_mongod_replset_oplog_tail_timestamp{%(filteringSelector)s} - mongodb_mongod_replset_oplog_head_timestamp{%(filteringSelector)s}) - (avg by (%(aggCluster)s) (mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY",%(filteringSelector)s}))) <= 0' % config,
59+
'for': '5m',
60+
labels: {
61+
severity: 'critical',
62+
},
63+
},
64+
{
65+
alert: 'MongodbNumberCursorsOpen',
66+
annotations: {
67+
description: 'Too many cursors opened by MongoDB for clients (> 10k) on {{ $labels.%(instanceLabel)s }}.' % config,
68+
summary: 'MongoDB number of cursors open too high.',
69+
},
70+
expr: 'mongodb_mongod_metrics_cursor_open{state="total", %(filteringSelector)s} > 10 * 1000' % config,
71+
'for': '2m',
72+
labels: {
73+
severity: 'warning',
74+
},
75+
},
76+
{
77+
alert: 'MongodbCursorsTimeouts',
78+
annotations: {
79+
description: 'Too many cursors are timing out on {{ $labels.%(instanceLabel)s }}.' % config,
80+
summary: 'MongoDB cursors timeouts are exceeding the threshold.',
81+
},
82+
expr: 'increase(mongodb_mongod_metrics_cursor_timed_out_total{%(filteringSelector)s}[1m]) > 100' % config,
83+
'for': '2m',
84+
labels: {
85+
severity: 'warning',
86+
},
87+
},
88+
{
89+
alert: 'MongodbTooManyConnections',
90+
annotations: {
91+
description: 'Too many connections to MongoDB instance {{ $labels.%(instanceLabel)s }} (> 80%%).' % config,
92+
summary: 'MongoDB has too many connections.',
93+
},
94+
expr: 'avg by (%(agg)s) (rate(mongodb_connections{state="current",%(filteringSelector)s}[1m])) / avg by (%(agg)s) (sum (mongodb_connections) by (%(agg)s)) * 100 > 80' % config,
95+
'for': '2m',
96+
labels: {
97+
severity: 'warning',
98+
},
99+
},
100+
{
101+
alert: 'MongodbVirtualMemoryUsage',
102+
annotations: {
103+
description: 'MongoDB virtual memory usage is too high on {{ $labels.%(instanceLabel)s }}.' % config,
104+
summary: 'MongoDB high memory usage.',
105+
},
106+
expr: '(sum(mongodb_memory{type="virtual",%(filteringSelector)s}) by (%(agg)s) / sum(mongodb_memory{type="mapped",%(filteringSelector)s}) by (%(agg)s)) > 3' % config,
107+
'for': '5m',
108+
labels: {
109+
severity: 'warning',
110+
},
111+
},
112+
{
113+
alert: 'MongodbReadRequestsQueueingUp',
114+
annotations: {
115+
description: 'MongoDB requests are queuing up on {{ $labels.%(instanceLabel)s }}.' % config,
116+
summary: 'MongoDB read requests are queuing up.',
117+
},
118+
expr: 'delta(mongodb_mongod_global_lock_current_queue{type="reader",%(filteringSelector)s}[1m]) > 0' % config,
119+
'for': '5m',
120+
labels: {
121+
severity: 'warning',
122+
},
123+
},
124+
{
125+
alert: 'MongodbWriteRequestsQueueingUp',
126+
annotations: {
127+
description: 'MongoDB write requests are queueing up on {{ $labels.%(instanceLabel)s }}.' % config,
128+
summary: 'MongoDB write requests are queueing up.',
129+
},
130+
expr: 'delta(mongodb_mongod_global_lock_current_queue{type="writer",%(filteringSelector)s}[1m]) > 0' % config,
131+
'for': '5m',
132+
labels: {
133+
severity: 'warning',
134+
},
135+
},
136+
],
137+
},
138+
],
139+
},
140+
}

mongodb-mixin/alerts/mongodbAlerts.yaml

Lines changed: 0 additions & 92 deletions
This file was deleted.

mongodb-mixin/config.libsonnet

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
_config+:: {
3+
// used only in alerts
4+
filteringSelector: '',
5+
groupLabels: ['job', 'mongodb_cluster'],
6+
instanceLabels: ['service_name'],
7+
},
8+
}

0 commit comments

Comments
 (0)