Skip to content

Commit 55d90be

Browse files
authored
Merge branch 'master' into jl-contrib-kafka
2 parents 9fb56f1 + a64f0ca commit 55d90be

File tree

16 files changed

+392
-316
lines changed

16 files changed

+392
-316
lines changed

mixin-utils/test/test_native-classic-histogram.libsonnet

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@ test.new(std.thisFile)
116116
},
117117
)
118118
)
119+
+ test.case.new(
120+
name='rate of average with sum_by labels',
121+
test=test.expect.eq(
122+
actual=utils.ncHistogramAverageRate('request_duration_seconds', 'cluster="cluster1", job="job1"', sum_by=['namespace']),
123+
expected={
124+
classic: 'sum by (namespace) (rate(request_duration_seconds_sum{cluster="cluster1", job="job1"}[$__rate_interval])) /\nsum by (namespace) (rate(request_duration_seconds_count{cluster="cluster1", job="job1"}[$__rate_interval]))\n',
125+
native: 'sum by (namespace) (histogram_sum(rate(request_duration_seconds{cluster="cluster1", job="job1"}[$__rate_interval]))) /\nsum by (namespace) (histogram_count(rate(request_duration_seconds{cluster="cluster1", job="job1"}[$__rate_interval])))\n',
126+
},
127+
)
128+
)
119129
+ test.case.new(
120130
name='rate of average in recording rule with different interval, multiplier',
121131
test=test.expect.eq(
@@ -218,3 +228,13 @@ test.new(std.thisFile)
218228
},
219229
)
220230
)
231+
+ test.case.new(
232+
name='simple templating',
233+
test=test.expect.eq(
234+
actual=utils.ncHistogramApplyTemplate('label_replace(%s, "x", "$1", "y", "(.*)")', { classic: 'classic_query', native: 'native_query' }),
235+
expected={
236+
classic: 'label_replace(classic_query, "x", "$1", "y", "(.*)")',
237+
native: 'label_replace(native_query, "x", "$1", "y", "(.*)")',
238+
}
239+
)
240+
)

mixin-utils/utils.libsonnet

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,24 +100,27 @@ local g = import 'grafana-builder/grafana.libsonnet';
100100
// classic histograms.
101101
// If from_recording is true, the function will assume :sum_rate metric
102102
// suffix and no rate needed.
103-
ncHistogramAverageRate(metric, selector, rate_interval='$__rate_interval', multiplier='', from_recording=false)::
103+
ncHistogramAverageRate(metric, selector, rate_interval='$__rate_interval', multiplier='', from_recording=false, sum_by=[])::
104+
local sumBy = if std.length(sum_by) > 0 then ' by (%s) ' % std.join(', ', sum_by) else '';
104105
local multiplierStr = if multiplier == '' then '' else '%s * ' % multiplier;
105106
{
106107
classic: |||
107-
%(multiplier)ssum(%(sumMetricQuery)s) /
108-
sum(%(countMetricQuery)s)
108+
%(multiplier)ssum%(sumBy)s(%(sumMetricQuery)s) /
109+
sum%(sumBy)s(%(countMetricQuery)s)
109110
||| % {
110111
sumMetricQuery: $.ncHistogramSumRate(metric, selector, rate_interval, from_recording).classic,
111112
countMetricQuery: $.ncHistogramCountRate(metric, selector, rate_interval, from_recording).classic,
112113
multiplier: multiplierStr,
114+
sumBy: sumBy,
113115
},
114116
native: |||
115-
%(multiplier)ssum(%(sumMetricQuery)s) /
116-
sum(%(countMetricQuery)s)
117+
%(multiplier)ssum%(sumBy)s(%(sumMetricQuery)s) /
118+
sum%(sumBy)s(%(countMetricQuery)s)
117119
||| % {
118120
sumMetricQuery: $.ncHistogramSumRate(metric, selector, rate_interval, from_recording).native,
119121
countMetricQuery: $.ncHistogramCountRate(metric, selector, rate_interval, from_recording).native,
120122
multiplier: multiplierStr,
123+
sumBy: sumBy,
121124
},
122125
},
123126

@@ -167,6 +170,16 @@ local g = import 'grafana-builder/grafana.libsonnet';
167170
},
168171
},
169172

173+
// ncHistogramApplyTemplate (native classic histogram template applier)
174+
// Takes a template like 'label_replace(%s, "x", "$1", "y", ".*")'
175+
// with a single substitution and applies to both the classic and native
176+
// histogram query.
177+
ncHistogramApplyTemplate(template, query):: {
178+
assert $.isNativeClassicQuery(query),
179+
native: template % query.native,
180+
classic: template % query.classic,
181+
},
182+
170183
// ncHistogramComment (native classic histogram comment) helps attach
171184
// comments to the query and also keep multiline strings where applicable.
172185
ncHistogramComment(query, comment):: {

mongodb-mixin/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# MongoDB Mixin
1+
# MongoDB mixin
22

3-
The MongoDB Mixin is a set of configurable, reusable, and extensible alerts and dashboards based on the metrics exported by [Percona MongoDB Exporter](https://github.com/percona/mongodb_exporter).
3+
The MongoDB mixin is a set of configurable, reusable, and extensible alerts and dashboards based on the metrics exported by [Percona MongoDB Exporter](https://github.com/percona/mongodb_exporter).
44

55
This mixin includes 3 dashboards suited for MongoDB, namely MongoDB Cluster, MongoDB Instance and MongoDB ReplicaSet.
66

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet';
2+
{
3+
prometheusAlerts+:: {
4+
local config =
5+
$._config
6+
{
7+
agg: std.join(',', $._config.groupLabels + $._config.instanceLabels),
8+
aggCluster: std.join(',', $._config.groupLabels),
9+
instanceLabel: xtd.array.slice($._config.instanceLabels, -1)[0],
10+
groupLabel: xtd.array.slice($._config.groupLabels, -1)[0],
11+
12+
},
13+
groups+: [
14+
{
15+
name: 'MongodbAlerts',
16+
rules: [
17+
{
18+
alert: 'MongodbDown',
19+
annotations: {
20+
summary: 'MongoDB instance is down.',
21+
description: 'MongoDB instance {{ $labels.%(instanceLabel)s }} is down.' % config,
22+
},
23+
expr: 'mongodb_up{%(filteringSelector)s} == 0' % config,
24+
'for': '5m',
25+
labels: {
26+
severity: 'critical',
27+
},
28+
},
29+
{
30+
alert: 'MongodbReplicaMemberUnhealthy',
31+
annotations: {
32+
description: 'MongoDB replica member is unhealthy (instance {{ $labels.%(instanceLabel)s }}).' % config,
33+
summary: 'MongoDB replica member is unhealthy.',
34+
},
35+
expr: 'mongodb_mongod_replset_member_health{%(filteringSelector)s} == 0' % config,
36+
labels: {
37+
severity: 'critical',
38+
},
39+
},
40+
{
41+
alert: 'MongodbReplicationLag',
42+
annotations: {
43+
description: 'MongoDB replication lag is more than 60s (instance {{ $labels.%(instanceLabel)s }})' % config,
44+
summary: 'MongoDB replication lag is exceeding the threshold.',
45+
},
46+
expr: 'mongodb_mongod_replset_member_replication_lag{state="SECONDARY", %(filteringSelector)s} > 60' % config,
47+
'for': '5m',
48+
labels: {
49+
severity: 'critical',
50+
},
51+
},
52+
{
53+
alert: 'MongodbReplicationHeadroom',
54+
annotations: {
55+
description: 'MongoDB replication headroom is <= 0 for {{ $labels.%(groupLabel)s }}.' % config,
56+
summary: 'MongoDB replication headroom is exceeding the threshold.',
57+
},
58+
expr: '(avg by (%(aggCluster)s) (mongodb_mongod_replset_oplog_tail_timestamp{%(filteringSelector)s} - mongodb_mongod_replset_oplog_head_timestamp{%(filteringSelector)s}) - (avg by (%(aggCluster)s) (mongodb_mongod_replset_member_optime_date{state="PRIMARY"}) - avg(mongodb_mongod_replset_member_optime_date{state="SECONDARY",%(filteringSelector)s}))) <= 0' % config,
59+
'for': '5m',
60+
labels: {
61+
severity: 'critical',
62+
},
63+
},
64+
{
65+
alert: 'MongodbNumberCursorsOpen',
66+
annotations: {
67+
description: 'Too many cursors opened by MongoDB for clients (> 10k) on {{ $labels.%(instanceLabel)s }}.' % config,
68+
summary: 'MongoDB number of cursors open too high.',
69+
},
70+
expr: 'mongodb_mongod_metrics_cursor_open{state="total", %(filteringSelector)s} > 10 * 1000' % config,
71+
'for': '2m',
72+
labels: {
73+
severity: 'warning',
74+
},
75+
},
76+
{
77+
alert: 'MongodbCursorsTimeouts',
78+
annotations: {
79+
description: 'Too many cursors are timing out on {{ $labels.%(instanceLabel)s }}.' % config,
80+
summary: 'MongoDB cursors timeouts are exceeding the threshold.',
81+
},
82+
expr: 'increase(mongodb_mongod_metrics_cursor_timed_out_total{%(filteringSelector)s}[1m]) > 100' % config,
83+
'for': '2m',
84+
labels: {
85+
severity: 'warning',
86+
},
87+
},
88+
{
89+
alert: 'MongodbTooManyConnections',
90+
annotations: {
91+
description: 'Too many connections to MongoDB instance {{ $labels.%(instanceLabel)s }} (> 80%%).' % config,
92+
summary: 'MongoDB has too many connections.',
93+
},
94+
expr: 'avg by (%(agg)s) (rate(mongodb_connections{state="current",%(filteringSelector)s}[1m])) / avg by (%(agg)s) (sum (mongodb_connections) by (%(agg)s)) * 100 > 80' % config,
95+
'for': '2m',
96+
labels: {
97+
severity: 'warning',
98+
},
99+
},
100+
{
101+
alert: 'MongodbVirtualMemoryUsage',
102+
annotations: {
103+
description: 'MongoDB virtual memory usage is too high on {{ $labels.%(instanceLabel)s }}.' % config,
104+
summary: 'MongoDB high memory usage.',
105+
},
106+
expr: '(sum(mongodb_memory{type="virtual",%(filteringSelector)s}) by (%(agg)s) / sum(mongodb_memory{type="mapped",%(filteringSelector)s}) by (%(agg)s)) > 3' % config,
107+
'for': '5m',
108+
labels: {
109+
severity: 'warning',
110+
},
111+
},
112+
{
113+
alert: 'MongodbReadRequestsQueueingUp',
114+
annotations: {
115+
description: 'MongoDB requests are queuing up on {{ $labels.%(instanceLabel)s }}.' % config,
116+
summary: 'MongoDB read requests are queuing up.',
117+
},
118+
expr: 'delta(mongodb_mongod_global_lock_current_queue{type="reader",%(filteringSelector)s}[1m]) > 0' % config,
119+
'for': '5m',
120+
labels: {
121+
severity: 'warning',
122+
},
123+
},
124+
{
125+
alert: 'MongodbWriteRequestsQueueingUp',
126+
annotations: {
127+
description: 'MongoDB write requests are queueing up on {{ $labels.%(instanceLabel)s }}.' % config,
128+
summary: 'MongoDB write requests are queueing up.',
129+
},
130+
expr: 'delta(mongodb_mongod_global_lock_current_queue{type="writer",%(filteringSelector)s}[1m]) > 0' % config,
131+
'for': '5m',
132+
labels: {
133+
severity: 'warning',
134+
},
135+
},
136+
],
137+
},
138+
],
139+
},
140+
}

mongodb-mixin/alerts/mongodbAlerts.yaml

Lines changed: 0 additions & 92 deletions
This file was deleted.

mongodb-mixin/config.libsonnet

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
_config+:: {
3+
// used only in alerts
4+
filteringSelector: '',
5+
groupLabels: ['job', 'mongodb_cluster'],
6+
instanceLabels: ['service_name'],
7+
},
8+
}

0 commit comments

Comments
 (0)