Skip to content

Commit 34d1cd6

Browse files
dashboard: separate replication panels
Move replication panels to "Replication overview" section. This patch is related to following metrics: - tnt_clock_delta - tnt_replication_lag - tnt_replication_status Part of #178
1 parent 51f3d6f commit 34d1cd6

27 files changed

+10831
-10442
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1919
- Set InfluxDB `policy` dynamically
2020
- Set datasource dynamically
2121
- Use expirationd module metrics in TDG dashboard
22+
- Move replication panels to a separate row
2223

2324
### Fixed
2425
- TDG dashboard latency units (graphql, iproto, rest requests)

dashboard/build/influxdb/dashboard_raw.libsonnet

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ function(
5555
measurement=measurement,
5656
alias=alias,
5757
)
58+
).addPanels(
59+
section.replication(
60+
datasource_type=variable.datasource_type.influxdb,
61+
datasource=datasource,
62+
policy=policy,
63+
measurement=measurement,
64+
alias=alias,
65+
)
5866
).addPanels(
5967
section.http(
6068
datasource_type=variable.datasource_type.influxdb,

dashboard/build/influxdb/tdg_dashboard_raw.libsonnet

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ function(
5555
measurement=measurement,
5656
alias=alias,
5757
)
58+
).addPanels(
59+
section.replication(
60+
datasource_type=variable.datasource_type.influxdb,
61+
datasource=datasource,
62+
policy=policy,
63+
measurement=measurement,
64+
alias=alias,
65+
)
5866
).addPanels(
5967
section.net(
6068
datasource_type=variable.datasource_type.influxdb,

dashboard/build/prometheus/dashboard_raw.libsonnet

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ function(
6363
job=job,
6464
alias=alias,
6565
)
66+
).addPanels(
67+
section.replication(
68+
datasource_type=variable.datasource_type.prometheus,
69+
datasource=datasource,
70+
job=job,
71+
alias=alias,
72+
)
6673
).addPanels(
6774
section.http(
6875
datasource_type=variable.datasource_type.prometheus,

dashboard/build/prometheus/tdg_dashboard_raw.libsonnet

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ function(
6363
job=job,
6464
alias=alias,
6565
)
66+
).addPanels(
67+
section.replication(
68+
datasource_type=variable.datasource_type.prometheus,
69+
datasource=datasource,
70+
job=job,
71+
alias=alias,
72+
)
6673
).addPanels(
6774
section.net(
6875
datasource_type=variable.datasource_type.prometheus,

dashboard/panels/cluster.libsonnet

Lines changed: 2 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ local timeseries = import 'dashboard/grafana/timeseries.libsonnet';
44
local common = import 'dashboard/panels/common.libsonnet';
55
local variable = import 'dashboard/variable.libsonnet';
66

7-
local graph = grafana.graphPanel;
87
local statPanel = grafana.statPanel;
98
local tablePanel = grafana.tablePanel;
109
local influxdb = grafana.influxdb;
@@ -406,50 +405,6 @@ local prometheus = grafana.prometheus;
406405
level='critical',
407406
),
408407

409-
replication_status(
410-
title='Tarantool replication status',
411-
description=|||
412-
`follows` status means replication is running.
413-
Otherwise, `not running` is displayed.
414-
415-
Panel works with `metrics >= 0.13.0` and Grafana 8.x.
416-
|||,
417-
datasource_type=null,
418-
datasource=null,
419-
policy=null,
420-
measurement=null,
421-
job=null,
422-
alias=null,
423-
):: timeseries.new(
424-
title=title,
425-
description=description,
426-
datasource=datasource,
427-
panel_width=8,
428-
max=1,
429-
min=0,
430-
).addValueMapping(
431-
1, 'green', 'follows'
432-
).addValueMapping(
433-
0, 'red', 'not running'
434-
).addRangeMapping(
435-
0.001, 0.999, '-'
436-
).addTarget(
437-
if datasource_type == variable.datasource_type.prometheus then
438-
prometheus.target(
439-
expr=std.format('tnt_replication_status{job=~"%s",alias=~"%s"}', [job, alias]),
440-
legendFormat='{{alias}} {{stream}} ({{id}})',
441-
)
442-
else if datasource_type == variable.datasource_type.influxdb then
443-
influxdb.target(
444-
policy=policy,
445-
measurement=measurement,
446-
group_tags=['label_pairs_alias', 'label_pairs_stream', 'label_pairs_id'],
447-
alias='$tag_label_pairs_alias $tag_label_pairs_stream ($tag_label_pairs_id)',
448-
fill='null',
449-
).where('metric_name', '=', 'tnt_replication_status').where('label_pairs_alias', '=~', alias)
450-
.selectField('value').addConverter('last')
451-
),
452-
453408
failovers_per_second(
454409
title='Failovers triggered',
455410
description=|||
@@ -469,7 +424,7 @@ local prometheus = grafana.prometheus;
469424
description=description,
470425
datasource=datasource,
471426
labelY1='failovers per second',
472-
panel_width=8,
427+
panel_width=12,
473428
).addTarget(common.default_rps_target(
474429
datasource_type,
475430
'tnt_cartridge_failover_trigger_total',
@@ -498,7 +453,7 @@ local prometheus = grafana.prometheus;
498453
title=title,
499454
description=description,
500455
datasource=datasource,
501-
panel_width=8,
456+
panel_width=12,
502457
max=1,
503458
min=0,
504459
).addValueMapping(
@@ -518,87 +473,4 @@ local prometheus = grafana.prometheus;
518473
'last'
519474
)
520475
),
521-
522-
replication_lag(
523-
title='Tarantool replication lag',
524-
description=|||
525-
Replication lag value for Tarantool instance.
526-
527-
Panel works with `metrics >= 0.13.0`.
528-
|||,
529-
datasource_type=null,
530-
datasource=null,
531-
policy=null,
532-
measurement=null,
533-
job=null,
534-
alias=null,
535-
):: common.default_graph(
536-
title=title,
537-
description=description,
538-
datasource=datasource,
539-
format='s',
540-
decimals=null,
541-
decimalsY1=null,
542-
legend_avg=false,
543-
min=0,
544-
panel_width=12,
545-
).addTarget(
546-
if datasource_type == variable.datasource_type.prometheus then
547-
prometheus.target(
548-
expr=std.format('tnt_replication_lag{job=~"%s",alias=~"%s"}', [job, alias]),
549-
legendFormat='{{alias}} ({{id}})',
550-
)
551-
else if datasource_type == variable.datasource_type.influxdb then
552-
influxdb.target(
553-
policy=policy,
554-
measurement=measurement,
555-
group_tags=['label_pairs_alias', 'label_pairs_id'],
556-
alias='$tag_label_pairs_alias ($tag_label_pairs_id)',
557-
fill='null',
558-
).where('metric_name', '=', 'tnt_replication_lag').where('label_pairs_alias', '=~', alias)
559-
.selectField('value').addConverter('mean')
560-
),
561-
562-
clock_delta(
563-
title='Instances clock delta',
564-
description=|||
565-
Clock drift across the cluster.
566-
max shows difference with the fastest clock (always positive),
567-
min shows difference with the slowest clock (always negative).
568-
569-
Panel works with `metrics >= 0.10.0`.
570-
|||,
571-
datasource_type=null,
572-
datasource=null,
573-
policy=null,
574-
measurement=null,
575-
job=null,
576-
alias=null,
577-
):: common.default_graph(
578-
title=title,
579-
description=description,
580-
datasource=datasource,
581-
format='s',
582-
decimals=null,
583-
decimalsY1=null,
584-
fill=1,
585-
legend_avg=false,
586-
legend_max=false,
587-
panel_width=12,
588-
).addTarget(
589-
if datasource_type == variable.datasource_type.prometheus then
590-
prometheus.target(
591-
expr=std.format('tnt_clock_delta{job=~"%s",alias=~"%s"}', [job, alias]),
592-
legendFormat='{{alias}} ({{delta}})',
593-
)
594-
else if datasource_type == variable.datasource_type.influxdb then
595-
influxdb.target(
596-
policy=policy,
597-
measurement=measurement,
598-
group_tags=['label_pairs_alias', 'label_pairs_delta'],
599-
alias='$tag_label_pairs_alias ($tag_label_pairs_delta)',
600-
fill='null',
601-
).where('metric_name', '=', 'tnt_clock_delta').where('label_pairs_alias', '=~', alias)
602-
.selectField('value').addConverter('last')
603-
),
604476
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
local grafana = import 'grafonnet/grafana.libsonnet';
2+
3+
local timeseries = import 'dashboard/grafana/timeseries.libsonnet';
4+
local common = import 'dashboard/panels/common.libsonnet';
5+
local variable = import 'dashboard/variable.libsonnet';
6+
7+
local graph = grafana.graphPanel;
8+
local influxdb = grafana.influxdb;
9+
local prometheus = grafana.prometheus;
10+
11+
{
12+
row:: common.row('Replication overview'),
13+
14+
replication_status(
15+
title='Tarantool replication status',
16+
description=|||
17+
`follows` status means replication is running.
18+
Otherwise, `not running` is displayed.
19+
20+
Panel works with `metrics >= 0.13.0` and Grafana 8.x.
21+
|||,
22+
datasource_type=null,
23+
datasource=null,
24+
policy=null,
25+
measurement=null,
26+
job=null,
27+
alias=null,
28+
):: timeseries.new(
29+
title=title,
30+
description=description,
31+
datasource=datasource,
32+
panel_width=8,
33+
max=1,
34+
min=0,
35+
).addValueMapping(
36+
1, 'green', 'follows'
37+
).addValueMapping(
38+
0, 'red', 'not running'
39+
).addRangeMapping(
40+
0.001, 0.999, '-'
41+
).addTarget(
42+
if datasource_type == variable.datasource_type.prometheus then
43+
prometheus.target(
44+
expr=std.format('tnt_replication_status{job=~"%s",alias=~"%s"}', [job, alias]),
45+
legendFormat='{{alias}} {{stream}} ({{id}})',
46+
)
47+
else if datasource_type == variable.datasource_type.influxdb then
48+
influxdb.target(
49+
policy=policy,
50+
measurement=measurement,
51+
group_tags=['label_pairs_alias', 'label_pairs_stream', 'label_pairs_id'],
52+
alias='$tag_label_pairs_alias $tag_label_pairs_stream ($tag_label_pairs_id)',
53+
fill='null',
54+
).where('metric_name', '=', 'tnt_replication_status').where('label_pairs_alias', '=~', alias)
55+
.selectField('value').addConverter('last')
56+
),
57+
58+
replication_lag(
59+
title='Tarantool replication lag',
60+
description=|||
61+
Replication lag value for Tarantool instance.
62+
63+
Panel works with `metrics >= 0.13.0`.
64+
|||,
65+
datasource_type=null,
66+
datasource=null,
67+
policy=null,
68+
measurement=null,
69+
job=null,
70+
alias=null,
71+
):: common.default_graph(
72+
title=title,
73+
description=description,
74+
datasource=datasource,
75+
format='s',
76+
decimals=null,
77+
decimalsY1=null,
78+
legend_avg=false,
79+
min=0,
80+
panel_width=8,
81+
).addTarget(
82+
if datasource_type == variable.datasource_type.prometheus then
83+
prometheus.target(
84+
expr=std.format('tnt_replication_lag{job=~"%s",alias=~"%s"}', [job, alias]),
85+
legendFormat='{{alias}} ({{id}})',
86+
)
87+
else if datasource_type == variable.datasource_type.influxdb then
88+
influxdb.target(
89+
policy=policy,
90+
measurement=measurement,
91+
group_tags=['label_pairs_alias', 'label_pairs_id'],
92+
alias='$tag_label_pairs_alias ($tag_label_pairs_id)',
93+
fill='null',
94+
).where('metric_name', '=', 'tnt_replication_lag').where('label_pairs_alias', '=~', alias)
95+
.selectField('value').addConverter('mean')
96+
),
97+
98+
clock_delta(
99+
title='Instances clock delta',
100+
description=|||
101+
Clock drift across the cluster.
102+
max shows difference with the fastest clock (always positive),
103+
min shows difference with the slowest clock (always negative).
104+
105+
Panel works with `metrics >= 0.10.0`.
106+
|||,
107+
datasource_type=null,
108+
datasource=null,
109+
policy=null,
110+
measurement=null,
111+
job=null,
112+
alias=null,
113+
):: common.default_graph(
114+
title=title,
115+
description=description,
116+
datasource=datasource,
117+
format='s',
118+
decimals=null,
119+
decimalsY1=null,
120+
fill=1,
121+
legend_avg=false,
122+
legend_max=false,
123+
panel_width=8,
124+
).addTarget(
125+
if datasource_type == variable.datasource_type.prometheus then
126+
prometheus.target(
127+
expr=std.format('tnt_clock_delta{job=~"%s",alias=~"%s"}', [job, alias]),
128+
legendFormat='{{alias}} ({{delta}})',
129+
)
130+
else if datasource_type == variable.datasource_type.influxdb then
131+
influxdb.target(
132+
policy=policy,
133+
measurement=measurement,
134+
group_tags=['label_pairs_alias', 'label_pairs_delta'],
135+
alias='$tag_label_pairs_alias ($tag_label_pairs_delta)',
136+
fill='null',
137+
).where('metric_name', '=', 'tnt_clock_delta').where('label_pairs_alias', '=~', alias)
138+
.selectField('value').addConverter('last')
139+
),
140+
}

0 commit comments

Comments
 (0)