Skip to content

Commit 02172c2

Browse files
dashboard: panels with CPU and memory utilization
This patch adds CPU total utilization panel per instance, memory/virtual memory utilization per instance and total. Closes #TNTP-4365
1 parent a225559 commit 02172c2

File tree

6 files changed

+149
-9
lines changed

6 files changed

+149
-9
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010
- Panel with Сartridge configuration checksum (#242)
11-
11+
- Panel with CPU total utilization per instance (#TNTP-4365)
12+
- Panels with memory/virtual memory utilization per instance and total (#TNTP-4365)
1213

1314
## [3.2.1] - 2024-12-06
1415
Grafana revisions:

dashboard/panels/cpu.libsonnet

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
local grafana = import 'grafonnet/grafana.libsonnet';
22

33
local common = import 'dashboard/panels/common.libsonnet';
4+
local common_utils = import 'dashboard/panels/common.libsonnet';
45
local variable = import 'dashboard/variable.libsonnet';
56

67
local influxdb = grafana.influxdb;
@@ -21,7 +22,7 @@ local prometheus = grafana.prometheus;
2122
format='percentunit',
2223
decimalsY1=0,
2324
min=0,
24-
panel_width=12,
25+
panel_width=8,
2526
).addTarget(
2627
common.target(cfg, metric_name, rate=true)
2728
),
@@ -60,6 +61,69 @@ local prometheus = grafana.prometheus;
6061
metric_name='tnt_cpu_system_time',
6162
),
6263

64+
local getrusage_cpu_total_percentage_graph(
65+
cfg, title, description, metric_user, metric_system,
66+
) = common.default_graph(
67+
cfg,
68+
title=title,
69+
description=description,
70+
format='percentunit',
71+
decimalsY1=0,
72+
min=0,
73+
panel_width=8,
74+
).addTarget(
75+
if cfg.type == variable.datasource_type.prometheus then
76+
prometheus.target(
77+
expr=std.format(
78+
'rate(%s[$__rate_interval]) + rate(%s[$__rate_interval])',
79+
[
80+
metric_user,
81+
metric_system,
82+
]
83+
),
84+
legendFormat='{{alias}}'
85+
)
86+
else if cfg.type == variable.datasource_type.influxdb then
87+
local filters = common_utils.influxdb_query_filters(cfg.filters);
88+
influxdb.target(
89+
rawQuery=true,
90+
query=std.format(|||
91+
SELECT mean("%(metrics_prefix)s%(metric_user)s") + mean("%(metrics_prefix)s%(metric_system)s")
92+
as "total" FROM
93+
(SELECT "value" as "%(metrics_prefix)s%(metric_user)s" FROM %(policy_prefix)s"%(measurement)s"
94+
WHERE ("metric_name" = '%(metrics_prefix)s%(metric_user)s' %(filters)s),
95+
(SELECT "value" as "%(metrics_prefix)s%(metric_system)s" FROM %(policy_prefix)s"%(measurement)s"
96+
WHERE ("metric_name" = '%(metrics_prefix)s%(metric_system)s' %(filters)s))
97+
GROUP BY time($__interval), "label_pairs_alias", "label_pairs_name" fill(none)
98+
|||, {
99+
metrics_prefix: cfg.metrics_prefix,
100+
metric_user: metric_user,
101+
metric_system: metric_system,
102+
policy_prefix: if cfg.policy == 'default' then '' else std.format('"%(policy)s".', cfg.policy),
103+
measurement: cfg.measurement,
104+
filters: if filters == '' then '' else std.format('AND %s', filters),
105+
}),
106+
alias='$tag_label_pairs_name — $tag_label_pairs_alias'
107+
),
108+
),
109+
110+
getrusage_cpu_total_time(
111+
cfg,
112+
title='CPU total time',
113+
description=|||
114+
This is the average share of time spent
115+
by instance process executing.
116+
117+
Panel minimal requirements: metrics 0.8.0.
118+
|||,
119+
):: getrusage_cpu_total_percentage_graph(
120+
cfg=cfg,
121+
title=title,
122+
description=description,
123+
metric_user='tnt_cpu_user_time',
124+
metric_system='tnt_cpu_system_time',
125+
),
126+
63127
local procstat_thread_time_graph(
64128
cfg,
65129
title,

dashboard/panels/runtime.libsonnet

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,74 @@
1+
local grafana = import 'grafonnet/grafana.libsonnet';
2+
local prometheus = grafana.prometheus;
3+
local influxdb = grafana.influxdb;
4+
15
local common = import 'dashboard/panels/common.libsonnet';
6+
local variable = import 'dashboard/variable.libsonnet';
27

38
{
49
row:: common.row('Tarantool runtime overview'),
510

11+
local aggregate_expr(cfg, metric_name, aggregate='sum', rate=false) =
12+
local inner_expr = std.format(
13+
'%s%s{%s}',
14+
[
15+
cfg.metrics_prefix,
16+
metric_name,
17+
common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias')),
18+
]
19+
);
20+
std.format(
21+
'%s(%s)',
22+
[
23+
aggregate,
24+
if rate then std.format('rate(%s[$__rate_interval])', inner_expr) else inner_expr,
25+
]
26+
),
27+
28+
total_memory(
29+
cfg,
30+
title='Total memory',
31+
description=|||
32+
Total memory used by Tarantool instance.
33+
|||,
34+
):: common.default_graph(
35+
cfg,
36+
title=title,
37+
description=description,
38+
format='bytes',
39+
labelY1='in bytes',
40+
panel_width=12,
41+
).addTarget(
42+
common.target(cfg, 'tnt_memory')
43+
).addTarget(
44+
if cfg.type == variable.datasource_type.prometheus then
45+
prometheus.target(expr=aggregate_expr(cfg, 'tnt_memory'),)
46+
else if cfg.type == variable.datasource_type.influxdb then
47+
influxdb.target()
48+
),
49+
50+
virt_memory(
51+
cfg,
52+
title='Virtual memory',
53+
description=|||
54+
Total virtual memory used by Tarantool instance.
55+
|||,
56+
):: common.default_graph(
57+
cfg,
58+
title=title,
59+
description=description,
60+
format='bytes',
61+
labelY1='in bytes',
62+
panel_width=12,
63+
).addTarget(
64+
common.target(cfg, 'tnt_memory_virt')
65+
).addTarget(
66+
if cfg.type == variable.datasource_type.prometheus then
67+
prometheus.target(expr=aggregate_expr(cfg, 'tnt_memory_virt'),)
68+
else if cfg.type == variable.datasource_type.influxdb then
69+
influxdb.target()
70+
),
71+
672
lua_memory(
773
cfg,
874
title='Lua memory',

dashboard/section.libsonnet

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,12 +223,14 @@ local vinyl = import 'dashboard/panels/vinyl.libsonnet';
223223

224224
cpu(cfg):: [
225225
cpu.row,
226+
cpu.getrusage_cpu_total_time(cfg),
226227
cpu.getrusage_cpu_user_time(cfg),
227228
cpu.getrusage_cpu_system_time(cfg),
228229
],
229230

230231
cpu_extended(cfg):: [
231232
cpu.row,
233+
cpu.getrusage_cpu_total_time(cfg),
232234
cpu.getrusage_cpu_user_time(cfg),
233235
cpu.getrusage_cpu_system_time(cfg),
234236
cpu.procstat_thread_user_time(cfg),
@@ -237,6 +239,8 @@ local vinyl = import 'dashboard/panels/vinyl.libsonnet';
237239

238240
runtime(cfg):: [
239241
runtime.row,
242+
runtime.total_memory(cfg),
243+
runtime.virt_memory(cfg),
240244
runtime.lua_memory(cfg),
241245
runtime.runtime_memory(cfg),
242246
runtime.memory_tx(cfg),

example_cluster/tarantool3_project/app.Dockerfile

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.22-bullseye
1+
FROM golang:1.23-bullseye
22

33
WORKDIR /app
44

@@ -17,12 +17,15 @@ RUN curl -L https://tarantool.io/release/3/installer.sh | bash
1717
RUN DEBIAN_FRONTEND=noninteractive apt install -y tarantool tarantool-dev tt
1818

1919
RUN tt init
20-
# Need tt start -i
21-
RUN DEBIAN_FRONTEND=noninteractive apt install -y git patch
22-
RUN git clone https://github.com/magefile/mage && \
23-
cd mage && \
24-
go run bootstrap.go
25-
RUN tt install tt master
20+
21+
#RUN DEBIAN_FRONTEND=noninteractive apt install -y git patch
22+
#RUN git clone https://github.com/magefile/mage && \
23+
# cd mage && \
24+
# go run bootstrap.go
25+
#RUN tt install tt master
26+
#RUN tt install tarantool master
2627

2728
RUN tt rocks make
29+
2830
ENTRYPOINT tt start -i
31+

supported_metrics.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ Format is as follows.
77

88
Based on [tarantool/metrics 1.2.0](https://github.com/tarantool/metrics/releases/tag/1.2.0).
99

10+
- [x] **tnt_memory**: see *Tarantool memory statistics/Resident memory* panel (#TNTP-4365)
11+
- [x] **tnt_memory_virt**: see *Tarantool memory statistics/Virtual memory* panel (#TNTP-4365)
1012
- [x] **tnt_clock_delta**: see *Replication overview/Instances clock delta* panel ([#133](https://github.com/tarantool/grafana-dashboard/issues/133))
1113
- [x] **tnt_cpu_user_time**: see *Tarantool CPU statistics/CPU user time* panel ([#71](https://github.com/tarantool/grafana-dashboard/issues/71))
1214
- [x] **tnt_cpu_system_time**: see *Tarantool CPU statistics/CPU system time* panel ([#71](https://github.com/tarantool/grafana-dashboard/issues/71))

0 commit comments

Comments
 (0)