Skip to content

Commit 8357c11

Browse files
DifferentialOrangevasiliy-t
authored andcommitted
Add cluster overview panels for Prometheus
Closes #18, #19
1 parent 5025103 commit 8357c11

File tree

5 files changed

+804
-77
lines changed

5 files changed

+804
-77
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [Unreleased]
8+
9+
## Added
10+
- Cluster overview panels for Prometheus
11+
712
## [0.2.0] - 2020-09-23
813
Grafana revisions: [InfluxDB revision 3](https://grafana.com/api/dashboards/12567/revisions/3/download), [Prometheus revision 1](https://grafana.com/api/dashboards/13054/revisions/1/download)
914

tarantool/cluster.libsonnet

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
local grafana = import 'grafonnet/grafana.libsonnet';
2+
3+
local statPanel = grafana.statPanel;
4+
local tablePanel = grafana.tablePanel;
5+
local prometheus = grafana.prometheus;
6+
7+
{
8+
health_overview_table(
9+
title='Cluster status overview',
10+
description=null,
11+
12+
datasource=null,
13+
measurement=null,
14+
job=null,
15+
):: tablePanel.new(
16+
title=title,
17+
description=(
18+
if description != null then
19+
description
20+
else (
21+
if datasource == '${DS_PROMETHEUS}' then
22+
|||
23+
Overview of Tarantool instances, observed by Prometheus job.
24+
25+
If instance row is *red*, it means Prometheus can't reach URI specified in targets or ran into error.
26+
If instance row is *green*, it means instance is up and running and
27+
Prometheus is successfully extracting metrics from it.
28+
"Uptime" column shows time since instant start.
29+
|||
30+
else
31+
null
32+
)
33+
),
34+
datasource=datasource,
35+
36+
styles=[
37+
{
38+
alias: 'Instance alias',
39+
pattern: 'alias',
40+
thresholds: [],
41+
type: 'string',
42+
mappingType: 1,
43+
},
44+
{
45+
alias: 'Instance URI',
46+
pattern: 'instance',
47+
thresholds: [],
48+
type: 'string',
49+
mappingType: 1,
50+
},
51+
{
52+
alias: 'Uptime',
53+
colorMode: 'row',
54+
colors: [
55+
'rgba(245, 54, 54, 0.9)',
56+
'rgba(237, 129, 40, 0.89)',
57+
'rgba(50, 172, 45, 0.97)',
58+
],
59+
decimals: 0,
60+
mappingType: 1,
61+
pattern: 'Value',
62+
thresholds: ['0.1', '0.1'],
63+
type: 'number',
64+
unit: 's',
65+
},
66+
],
67+
sort={
68+
col: 2,
69+
desc: false,
70+
},
71+
transform='table',
72+
).hideColumn('job').hideColumn('__name__').hideColumn('Time').addTarget(
73+
if datasource == '${DS_PROMETHEUS}' then
74+
prometheus.target(
75+
expr=std.format(
76+
|||
77+
up{job="%s"} * on(instance) group_left(alias) tnt_info_uptime{job="%s"} or
78+
on(instance) label_replace(up{job="%s"}, "alias", "Not available", "instance", ".*")
79+
|||,
80+
[job, job, job]
81+
),
82+
format='table',
83+
instant=true,
84+
)
85+
else if datasource == '${DS_INFLUXDB}' then
86+
error 'InfluxDB target not supported yet'
87+
),
88+
89+
local title_workaround( // Workaround for missing options.fieldOptions.defaults.title https://github.com/grafana/grafonnet-lib/pull/260
90+
stat_panel,
91+
title
92+
) = (
93+
stat_panel {
94+
options: stat_panel.options {
95+
fieldOptions: stat_panel.options.fieldOptions {
96+
defaults: stat_panel.options.fieldOptions.defaults {
97+
title: title,
98+
},
99+
},
100+
},
101+
}
102+
),
103+
104+
local overview_stat(
105+
title,
106+
description,
107+
datasource,
108+
measurement=null,
109+
job=null,
110+
stat_title=null,
111+
decimals=null,
112+
unit=null,
113+
expr=null,
114+
) = title_workaround(
115+
statPanel.new(
116+
title=(if title != null then title else ''),
117+
description=description,
118+
119+
datasource=datasource,
120+
colorMode='value',
121+
decimals=decimals,
122+
unit=unit,
123+
reducerFunction='last',
124+
pluginVersion='6.6.0',
125+
).addThreshold(
126+
{ color: 'red', value: null }
127+
).addThreshold(
128+
{ color: 'green', value: 0.1 }
129+
).addTarget(prometheus.target(expr=expr)),
130+
stat_title
131+
),
132+
133+
health_overview_stat(
134+
title='',
135+
description=null,
136+
137+
datasource=null,
138+
measurement=null,
139+
job=null,
140+
):: overview_stat(
141+
title=title,
142+
description=(
143+
if description != null then
144+
description
145+
else (
146+
if datasource == '${DS_PROMETHEUS}' then
147+
|||
148+
Count of running Tarantool instances, observed by Prometheus job.
149+
If Prometheus can't reach URI specified in targets or ran into error, instance is not counted.
150+
|||
151+
else
152+
null
153+
)
154+
),
155+
datasource=datasource,
156+
measurement=measurement,
157+
job=job,
158+
stat_title='Total instances running:',
159+
decimals=0,
160+
unit='none',
161+
expr=std.format('sum(up{job=~"%s"})', job),
162+
),
163+
164+
memory_used_stat(
165+
title='',
166+
description=null,
167+
168+
datasource=null,
169+
measurement=null,
170+
job=null,
171+
):: overview_stat(
172+
title=title,
173+
description=(
174+
if description != null then
175+
description
176+
else (
177+
if datasource == '${DS_PROMETHEUS}' then
178+
|||
179+
Overall value of memory used by Tarantool items and indexes (*arena_used* value).
180+
If Tarantool instance is not available for Prometheus metrics extraction now, its contribution is not counted.
181+
|||
182+
else
183+
null
184+
)
185+
),
186+
datasource=datasource,
187+
measurement=measurement,
188+
job=job,
189+
stat_title='Overall memory used:',
190+
decimals=2,
191+
unit='bytes',
192+
expr=std.format('sum(tnt_slab_arena_used{job=~"%s"})', job),
193+
),
194+
195+
memory_reserved_stat(
196+
title='',
197+
description=null,
198+
199+
datasource=null,
200+
measurement=null,
201+
job=null,
202+
):: overview_stat(
203+
title=title,
204+
description=(
205+
if description != null then
206+
description
207+
else (
208+
if datasource == '${DS_PROMETHEUS}' then
209+
|||
210+
Overall value of memory available for Tarantool items and indexes allocation (*memtx_memory* or *quota_size* values).
211+
If Tarantool instance is not available for Prometheus metrics extraction now, its contribution is not counted.
212+
|||
213+
else
214+
null
215+
)
216+
),
217+
datasource=datasource,
218+
measurement=measurement,
219+
job=job,
220+
stat_title='Overall memory reserved:',
221+
decimals=2,
222+
unit='bytes',
223+
expr=std.format('sum(tnt_slab_quota_size{job=~"%s"})', job),
224+
),
225+
226+
space_ops_stat(
227+
title='',
228+
description=null,
229+
230+
datasource=null,
231+
measurement=null,
232+
job=null,
233+
):: overview_stat(
234+
title=title,
235+
description=(
236+
if description != null then
237+
description
238+
else (
239+
if datasource == '${DS_PROMETHEUS}' then
240+
|||
241+
Overall rate of operations performed on Tarantool spaces (*select*, *insert*, *update* etc.).
242+
If Tarantool instance is not available for Prometheus metrics extraction now, its contribution is not counted.
243+
|||
244+
else
245+
null
246+
)
247+
),
248+
datasource=datasource,
249+
measurement=measurement,
250+
job=job,
251+
stat_title='Overall space load:',
252+
decimals=2,
253+
unit='ops',
254+
expr=std.format('sum(rate(tnt_stats_op_total{job=~"%s"}[1m]))', job),
255+
),
256+
257+
http_rps_stat(
258+
title='',
259+
description=null,
260+
261+
datasource=null,
262+
measurement=null,
263+
job=null,
264+
):: overview_stat(
265+
title=title,
266+
description=(
267+
if description != null then
268+
description
269+
else (
270+
if datasource == '${DS_PROMETHEUS}' then
271+
|||
272+
Overall rate of requests processed on Tarantool instances (all methods and response codes).
273+
If Tarantool instance is not available for Prometheus metrics extraction now, its contribution is not counted.
274+
|||
275+
else
276+
null
277+
)
278+
),
279+
datasource=datasource,
280+
measurement=measurement,
281+
job=job,
282+
stat_title='Overall HTTP load:',
283+
decimals=2,
284+
unit='reqps',
285+
expr=std.format('sum(rate(http_server_request_latency_count{job=~"%s"}[1m]))', job),
286+
),
287+
}

0 commit comments

Comments
 (0)