Skip to content

Commit d3f031e

Browse files
update cpu
1 parent 28b2dbc commit d3f031e

File tree

2 files changed

+104
-43
lines changed

2 files changed

+104
-43
lines changed

dashboard/panels/cpu.libsonnet

Lines changed: 93 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,24 @@ local prometheus = grafana.prometheus;
1010
{
1111
row:: common.row('Tarantool CPU statistics'),
1212

13+
local aggregate_expr(cfg, metric_name, aggregate='sum', rate=false) =
14+
local inner_expr = std.format(
15+
'%s%s{%s}',
16+
[
17+
cfg.metrics_prefix,
18+
metric_name,
19+
common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias')),
20+
]
21+
);
22+
std.format(
23+
'%s(%s)',
24+
[
25+
aggregate,
26+
if rate then std.format('rate(%s[$__rate_interval])', inner_expr) else inner_expr,
27+
]
28+
),
29+
30+
// --------------------------------------------------------------------------
1331
local getrusage_cpu_percentage_graph(
1432
cfg,
1533
title,
@@ -27,9 +45,9 @@ local prometheus = grafana.prometheus;
2745
common.target(cfg, metric_name, rate=true)
2846
),
2947

30-
getrusage_cpu_user_time(
48+
getrusage_cpu_instance_user_time(
3149
cfg,
32-
title='CPU user time',
50+
title='CPU user time per instance',
3351
description=|||
3452
This is the average share of time
3553
spent by instance process executing in user mode.
@@ -44,9 +62,9 @@ local prometheus = grafana.prometheus;
4462
metric_name='tnt_cpu_user_time',
4563
),
4664

47-
getrusage_cpu_system_time(
65+
getrusage_cpu_instance_system_time(
4866
cfg,
49-
title='CPU system time',
67+
title='CPU system time per instance',
5068
description=|||
5169
This is the average share of time
5270
spent by instance process executing in kernel mode.
@@ -61,8 +79,9 @@ local prometheus = grafana.prometheus;
6179
metric_name='tnt_cpu_system_time',
6280
),
6381

82+
// --------------------------------------------------------------------------
6483
local getrusage_cpu_total_percentage_graph(
65-
cfg, title, description, metric_user, metric_system,
84+
cfg, title, description,
6685
) = common.default_graph(
6786
cfg,
6887
title=title,
@@ -74,41 +93,44 @@ local prometheus = grafana.prometheus;
7493
).addTarget(
7594
if cfg.type == variable.datasource_type.prometheus then
7695
prometheus.target(
77-
expr=std.format(
78-
'rate(%s[$__rate_interval]) + rate(%s[$__rate_interval])',
79-
[
80-
metric_user,
81-
metric_system,
82-
]
83-
),
96+
expr='rate(tnt_cpu_user_time[$__rate_interval]) + rate(tnt_cpu_system_time[$__rate_interval])',
8497
legendFormat='{{alias}}'
8598
)
8699
else if cfg.type == variable.datasource_type.influxdb then
87-
local filters = common_utils.influxdb_query_filters(cfg.filters);
88-
influxdb.target(
89-
rawQuery=true,
90-
query=std.format(|||
91-
SELECT mean("%(metrics_prefix)s%(metric_user)s") + mean("%(metrics_prefix)s%(metric_system)s")
92-
as "total" FROM
93-
(SELECT "value" as "%(metrics_prefix)s%(metric_user)s" FROM %(policy_prefix)s"%(measurement)s"
94-
WHERE ("metric_name" = '%(metrics_prefix)s%(metric_user)s' %(filters)s),
95-
(SELECT "value" as "%(metrics_prefix)s%(metric_system)s" FROM %(policy_prefix)s"%(measurement)s"
96-
WHERE ("metric_name" = '%(metrics_prefix)s%(metric_system)s' %(filters)s))
97-
GROUP BY time($__interval), "label_pairs_alias", "label_pairs_name" fill(none)
98-
|||, {
99-
metrics_prefix: cfg.metrics_prefix,
100-
metric_user: metric_user,
101-
metric_system: metric_system,
102-
policy_prefix: if cfg.policy == 'default' then '' else std.format('"%(policy)s".', cfg.policy),
103-
measurement: cfg.measurement,
104-
filters: if filters == '' then '' else std.format('AND %s', filters),
105-
}),
106-
alias='$tag_label_pairs_name — $tag_label_pairs_alias'
107-
),
100+
influxdb.target()
101+
),
102+
103+
getrusage_cpu_instance_total_time(
104+
cfg,
105+
title='CPU total time per instance',
106+
description=|||
107+
This is the average share of time spent
108+
by instance process executing.
109+
110+
Panel minimal requirements: metrics 0.8.0.
111+
|||,
112+
):: getrusage_cpu_total_percentage_graph(
113+
cfg=cfg,
114+
title=title,
115+
description=description,
116+
),
117+
118+
// --------------------------------------------------------------------------
119+
local getrusage_cpu_common_percentage_graph(
120+
cfg, title, description, expr,
121+
) = common.default_graph(
122+
cfg,
123+
title=title,
124+
description=description,
125+
format='percentunit',
126+
decimalsY1=0,
127+
min=0,
128+
panel_width=8,
108129
).addTarget(
109130
if cfg.type == variable.datasource_type.prometheus then
110131
prometheus.target(
111-
expr='sum(rate(tnt_cpu_user_time{job=~"$job"}[$__rate_interval])) + sum(rate(tnt_cpu_system_time{job=~"$job"}[$__rate_interval]))',
132+
expr=expr,
133+
legendFormat='{{alias}}'
112134
)
113135
else if cfg.type == variable.datasource_type.influxdb then
114136
influxdb.target()
@@ -118,19 +140,51 @@ local prometheus = grafana.prometheus;
118140
cfg,
119141
title='CPU total time',
120142
description=|||
121-
This is the average share of time spent
122-
by instance process executing.
143+
This is the total share of time spent
144+
by each instance process executing.
123145
124146
Panel minimal requirements: metrics 0.8.0.
125147
|||,
126-
):: getrusage_cpu_total_percentage_graph(
148+
):: getrusage_cpu_common_percentage_graph(
149+
cfg=cfg,
150+
title=title,
151+
description=description,
152+
expr='sum(rate(tnt_cpu_user_time{job=~"$job"}[$__rate_interval])) + sum(rate(tnt_cpu_system_time{job=~"$job"}[$__rate_interval]))',
153+
),
154+
155+
getrusage_cpu_total_user_time(
156+
cfg,
157+
title='CPU total user time',
158+
description=|||
159+
This is the total share of time
160+
spent in user mode.
161+
162+
Panel minimal requirements: metrics 0.8.0.
163+
|||,
164+
):: getrusage_cpu_common_percentage_graph(
165+
cfg=cfg,
166+
title=title,
167+
description=description,
168+
expr='sum(rate(tnt_cpu_user_time{job=~"$job"}[$__rate_interval]))',
169+
),
170+
171+
getrusage_cpu_total_system_time(
172+
cfg,
173+
title='CPU total system time',
174+
description=|||
175+
This is the total share of time
176+
spent in system mode.
177+
178+
Panel minimal requirements: metrics 0.8.0.
179+
|||,
180+
):: getrusage_cpu_common_percentage_graph(
127181
cfg=cfg,
128182
title=title,
129183
description=description,
130-
metric_user='tnt_cpu_user_time',
131-
metric_system='tnt_cpu_system_time',
184+
expr='sum(rate(tnt_cpu_system_time{job=~"$job"}[$__rate_interval]))',
132185
),
133186

187+
// --------------------------------------------------------------------------
134188
local procstat_thread_time_graph(
135189
cfg,
136190
title,

dashboard/section.libsonnet

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,23 @@ local vinyl = import 'dashboard/panels/vinyl.libsonnet';
223223

224224
cpu(cfg):: [
225225
cpu.row,
226+
cpu.getrusage_cpu_instance_total_time(cfg),
227+
cpu.getrusage_cpu_instance_user_time(cfg),
228+
cpu.getrusage_cpu_instance_system_time(cfg),
226229
cpu.getrusage_cpu_total_time(cfg),
227-
cpu.getrusage_cpu_user_time(cfg),
228-
cpu.getrusage_cpu_system_time(cfg),
230+
cpu.getrusage_cpu_total_user_time(cfg),
231+
cpu.getrusage_cpu_total_system_time(cfg),
232+
229233
],
230234

231235
cpu_extended(cfg):: [
232236
cpu.row,
237+
cpu.getrusage_cpu_instance_total_time(cfg),
238+
cpu.getrusage_cpu_instance_user_time(cfg),
239+
cpu.getrusage_cpu_instance_system_time(cfg),
233240
cpu.getrusage_cpu_total_time(cfg),
234-
cpu.getrusage_cpu_user_time(cfg),
235-
cpu.getrusage_cpu_system_time(cfg),
241+
cpu.getrusage_cpu_total_user_time(cfg),
242+
cpu.getrusage_cpu_total_system_time(cfg),
236243
cpu.procstat_thread_user_time(cfg),
237244
cpu.procstat_thread_system_time(cfg),
238245
],

0 commit comments

Comments
 (0)