11local  grafana = import  'grafonnet/grafana.libsonnet' ;
22
33local  common = import  'dashboard/panels/common.libsonnet' ;
4+ local  common_utils = import  'dashboard/panels/common.libsonnet' ;
45local  variable = import  'dashboard/variable.libsonnet' ;
56
67local  influxdb = grafana.influxdb;
@@ -21,14 +22,14 @@ local prometheus = grafana.prometheus;
2122    format='percentunit' ,
2223    decimalsY1=0 ,
2324    min=0 ,
24-     panel_width=12 ,
25+     panel_width=8 ,
2526  ).addTarget(
2627    common.target(cfg, metric_name, rate=true )
2728  ),
2829
29-   getrusage_cpu_user_time (
30+   getrusage_cpu_instance_user_time (
3031    cfg,
31-     title='CPU user time' ,
32+     title='CPU user time per instance ' ,
3233    description=||| 
3334      This is the average share of time 
3435      spent by instance process executing in user mode. 
@@ -43,9 +44,9 @@ local prometheus = grafana.prometheus;
4344    metric_name='tnt_cpu_user_time' ,
4445  ),
4546
46-   getrusage_cpu_system_time (
47+   getrusage_cpu_instance_system_time (
4748    cfg,
48-     title='CPU system time' ,
49+     title='CPU system time per instance ' ,
4950    description=||| 
5051      This is the average share of time 
5152      spent by instance process executing in kernel mode. 
@@ -60,6 +61,225 @@ local prometheus = grafana.prometheus;
6061    metric_name='tnt_cpu_system_time' ,
6162  ),
6263
64+   // -------------------------------------------------------------------------- 
65+   local  getrusage_cpu_total_percentage_graph(
66+     cfg, title, description,
67+   ) = common.default_graph(
68+     cfg,
69+     title=title,
70+     description=description,
71+     format='percentunit' ,
72+     decimalsY1=0 ,
73+     min=0 ,
74+     panel_width=8 ,
75+   ).addTarget(
76+     if  cfg.type == variable.datasource_type.prometheus then 
77+       prometheus.target(
78+         expr=std.format (
79+           ||| 
80+             rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval]) + 
81+             rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval]) 
82+ ||| ,
83+           {
84+             metrics_prefix:  cfg.metrics_prefix,
85+             filters:  common.prometheus_query_filters(cfg.filters),
86+           }
87+         ),
88+         legendFormat='{{alias}}' 
89+       )
90+     else  if  cfg.type == variable.datasource_type.influxdb then 
91+       influxdb.target(
92+         rawQuery=true ,
93+         query=std.format(||| 
94+           SELECT non_negative_derivative(SUM("value"), 1s) 
95+           FROM %(measurement_with_policy)s 
96+           WHERE (("metric_name" = '%(metric_user_time)s' OR "metric_name" = '%(metric_system_time)s') AND %(filters)s) 
97+           AND $timeFilter 
98+           GROUP BY time($__interval), "label_pairs_alias" fill(none) 
99+ ||| , {
100+           measurement_with_policy:  std.format ('%(policy_prefix)s"%(measurement)s"' , {
101+             policy_prefix:  if  cfg.policy == 'default'  then  ''  else  std.format ('"%(policy)s".' , cfg.policy),
102+             measurement:  cfg.measurement,
103+           }),
104+           metric_user_time:  cfg.metrics_prefix + 'tnt_cpu_user_time' ,
105+           metric_system_time:  cfg.metrics_prefix + 'tnt_cpu_system_time' ,
106+           filters:  common.influxdb_query_filters(cfg.filters),
107+         }),
108+         alias='$tag_label_pairs_alias' ,
109+       )
110+   ),
111+ 
112+   getrusage_cpu_instance_total_time(
113+     cfg,
114+     title='CPU total time per instance' ,
115+     description=||| 
116+       This is the average share of time spent 
117+       by instance process executing. 
118+ 
119+       Panel minimal requirements: metrics 0.8.0. 
120+ ||| ,
121+   ):: getrusage_cpu_total_percentage_graph(
122+     cfg=cfg,
123+     title=title,
124+     description=description,
125+   ),
126+ 
127+   // -------------------------------------------------------------------------- 
128+   local  getrusage_cpu_common_percentage_graph(
129+     cfg,
130+     title,
131+     description,
132+     prometheus_expr,
133+     prometheus_legend,
134+     influx_query,
135+     influx_alias,
136+   ) = common.default_graph(
137+     cfg,
138+     title=title,
139+     description=description,
140+     format='percentunit' ,
141+     decimalsY1=0 ,
142+     min=0 ,
143+     panel_width=8 ,
144+   ).addTarget(
145+     if  cfg.type == variable.datasource_type.prometheus then 
146+       prometheus.target(
147+         expr=prometheus_expr,
148+         legendFormat=prometheus_legend,
149+       )
150+     else  if  cfg.type == variable.datasource_type.influxdb then 
151+       influxdb.target(
152+         rawQuery=true ,
153+         query=influx_query,
154+         alias=influx_alias,
155+       )
156+   ),
157+ 
158+   getrusage_cpu_total_time(
159+     cfg,
160+     title='CPU total time per cluster' ,
161+     description=||| 
162+       This is the total share of time spent 
163+       by each cluster process executing. 
164+ 
165+       Panel minimal requirements: metrics 0.8.0. 
166+ ||| ,
167+   ):: getrusage_cpu_common_percentage_graph(
168+     cfg=cfg,
169+     title=title,
170+     description=description,
171+     prometheus_expr=std.format (
172+       ||| 
173+         sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval])) + 
174+         sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval])) 
175+ ||| ,
176+       {
177+         metrics_prefix:  cfg.metrics_prefix,
178+         filters:  common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
179+       }
180+     ),
181+     prometheus_legend=title,
182+     influx_query=std.format(||| 
183+       SELECT non_negative_derivative(SUM("value"), 1s) 
184+       FROM %(measurement_with_policy)s 
185+       WHERE (("metric_name" = '%(metric_user_time)s' OR "metric_name" = '%(metric_system_time)s') AND %(filters)s) 
186+       AND $timeFilter 
187+       GROUP BY time($__interval) 
188+ ||| , {
189+       measurement_with_policy:  std.format ('%(policy_prefix)s"%(measurement)s"' , {
190+         policy_prefix:  if  cfg.policy == 'default'  then  ''  else  std.format ('"%(policy)s".' , cfg.policy),
191+         measurement:  cfg.measurement,
192+       }),
193+       metric_user_time:  cfg.metrics_prefix + 'tnt_cpu_user_time' ,
194+       metric_system_time:  cfg.metrics_prefix + 'tnt_cpu_system_time' ,
195+       filters:  if  common.influxdb_query_filters(common.remove_field(cfg.filters, 'label_pairs_alias' )) != '' 
196+       then  common.influxdb_query_filters(common.remove_field(cfg.filters, 'label_pairs_alias' ))
197+       else  'true' ,
198+     }),
199+     influx_alias=title
200+   ),
201+ 
202+   getrusage_cpu_total_user_time(
203+     cfg,
204+     title='CPU total user time per cluster' ,
205+     description=||| 
206+       This is the total share of time 
207+       spent in user mode per cluster. 
208+ 
209+       Panel minimal requirements: metrics 0.8.0. 
210+ ||| ,
211+   ):: getrusage_cpu_common_percentage_graph(
212+     cfg=cfg,
213+     title=title,
214+     description=description,
215+     prometheus_expr=std.format (
216+       ||| 
217+         sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval])) 
218+ ||| ,
219+       {
220+         metrics_prefix:  cfg.metrics_prefix,
221+         filters:  common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
222+       }
223+     ),
224+     prometheus_legend=title,
225+     influx_query=std.format(||| 
226+       SELECT non_negative_derivative(SUM("value"), 1s) 
227+       FROM %(measurement_with_policy)s 
228+       WHERE "metric_name" = '%(metric_user_time)s' AND %(filters)s 
229+       AND $timeFilter 
230+       GROUP BY time($__interval) 
231+ ||| , {
232+       measurement_with_policy:  std.format ('%(policy_prefix)s"%(measurement)s"' , {
233+         policy_prefix:  if  cfg.policy == 'default'  then  ''  else  std.format ('"%(policy)s".' , cfg.policy),
234+         measurement:  cfg.measurement,
235+       }),
236+       metric_user_time:  cfg.metrics_prefix + 'tnt_cpu_user_time' ,
237+       filters:  common.influxdb_query_filters(cfg.filters),
238+     }),
239+     influx_alias=title
240+   ),
241+ 
242+   getrusage_cpu_total_system_time(
243+     cfg,
244+     title='CPU total system time per cluster' ,
245+     description=||| 
246+       This is the total share of time 
247+       spent in system mode per cluster. 
248+ 
249+       Panel minimal requirements: metrics 0.8.0. 
250+ ||| ,
251+   ):: getrusage_cpu_common_percentage_graph(
252+     cfg=cfg,
253+     title=title,
254+     description=description,
255+     prometheus_expr=std.format (
256+       ||| 
257+         sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval])) 
258+ ||| ,
259+       {
260+         metrics_prefix:  cfg.metrics_prefix,
261+         filters:  common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
262+       }
263+     ),
264+     prometheus_legend=title,
265+     influx_query=std.format(||| 
266+       SELECT non_negative_derivative(SUM("value"), 1s) 
267+       FROM %(measurement_with_policy)s 
268+       WHERE "metric_name" = '%(metric_system_time)s' AND %(filters)s 
269+       AND $timeFilter 
270+       GROUP BY time($__interval) 
271+ ||| , {
272+       measurement_with_policy:  std.format ('%(policy_prefix)s"%(measurement)s"' , {
273+         policy_prefix:  if  cfg.policy == 'default'  then  ''  else  std.format ('"%(policy)s".' , cfg.policy),
274+         measurement:  cfg.measurement,
275+       }),
276+       metric_system_time:  cfg.metrics_prefix + 'tnt_cpu_system_time' ,
277+       filters:  common.influxdb_query_filters(cfg.filters),
278+     }),
279+     influx_alias=title
280+   ),
281+ 
282+   // -------------------------------------------------------------------------- 
63283  local  procstat_thread_time_graph(
64284    cfg,
65285    title,
0 commit comments