Skip to content

Commit 878d19e

Browse files
essenlukebakken
andauthored
Add microstate accounting collector (#98)
* Add microstate accounting collector * Add _microseconds suffix as per prometheus requirements here https://prometheus.io/docs/practices/naming/#metric-and-label-naming * Use seconds to represent total time Also convert msacc statistics to `native` time, then divide to get seconds. Part of #98 * microstate stats use perf_counter time unit, so no need to do multiple conversions Co-authored-by: Luke Bakken <[email protected]>
1 parent 6c0189f commit 878d19e

File tree

5 files changed

+414
-0
lines changed

5 files changed

+414
-0
lines changed
Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
%% @doc
2+
%% Collects microstate accounting metrics using
3+
%% <a href="http://erlang.org/doc/man/erlang.html#statistics_microstate_accounting">
4+
%% erlang:statistics(microstate_accounting)
5+
%% </a>.
6+
%%
7+
%% In order for values to increase, microstate
8+
%% accounting must be enabled. This is done by
9+
%% calling <code>erlang:system_flag(microstate_accounting, true).</code>
10+
%%
11+
%% ==Exported metrics==
12+
%% Some metrics are not available by default. They require a VM
13+
%% configured with <code>./configure --with-microstate-accounting=extra</code>.
14+
%%
15+
%% <ul>
16+
%% <li>
17+
%% `erlang_vm_msacc_aux_seconds_total'<br/>
18+
%% Type: counter.<br/>
19+
%% Total time in seconds spent handling auxiliary jobs.
20+
%% </li>
21+
%% <li>
22+
%% `erlang_vm_msacc_check_io_seconds_total'<br/>
23+
%% Type: counter.<br/>
24+
%% Total time in seconds spent checking for new I/O events.
25+
%% </li>
26+
%% <li>
27+
%% `erlang_vm_msacc_emulator_seconds_total'<br/>
28+
%% Type: counter.<br/>
29+
%% Total time in seconds spent executing Erlang processes.
30+
%% </li>
31+
%% <li>
32+
%% `erlang_vm_msacc_gc_seconds_total'<br/>
33+
%% Type: counter.<br/>
34+
%% Total time in seconds spent doing garbage collection.
35+
%% When extra states are enabled this is the time spent
36+
%% doing non-fullsweep garbage collections.
37+
%% </li>
38+
%% <li>
39+
%% `erlang_vm_msacc_other_seconds_total'<br/>
40+
%% Type: counter.<br/>
41+
%% Total time in seconds spent doing unaccounted things.
42+
%% </li>
43+
%% <li>
44+
%% `erlang_vm_msacc_port_seconds_total'<br/>
45+
%% Type: counter.<br/>
46+
%% Total time in seconds spent executing ports.
47+
%% </li>
48+
%% <li>
49+
%% `erlang_vm_msacc_sleep_seconds_total'<br/>
50+
%% Type: counter.<br/>
51+
%% Total time in seconds spent sleeping.
52+
%% </li>
53+
%% <li>
54+
%% `erlang_vm_msacc_alloc_seconds_total'<br/>
55+
%% Type: counter.<br/>
56+
%% Total time in seconds spent managing memory.
57+
%% Without extra states this time is spread out over all other states.
58+
%% </li>
59+
%% <li>
60+
%% `erlang_vm_msacc_bif_seconds_total'<br/>
61+
%% Type: counter.<br/>
62+
%% Total time in seconds spent in BIFs.
63+
%% Without extra states this time is part of the 'emulator' state.
64+
%% </li>
65+
%% <li>
66+
%% `erlang_vm_msacc_busy_wait_seconds_total'<br/>
67+
%% Type: counter.<br/>
68+
%% Total time in seconds spent busy waiting.
69+
%% Without extra states this time is part of the 'other' state.
70+
%% </li>
71+
%% <li>
72+
%% `erlang_vm_msacc_ets_seconds_total'<br/>
73+
%% Type: counter.<br/>
74+
%% Total time in seconds spent executing ETS BIFs.
75+
%% Without extra states this time is part of the 'emulator' state.
76+
%% </li>
77+
%% <li>
78+
%% `erlang_vm_msacc_gc_full_seconds_total'<br/>
79+
%% Type: counter.<br/>
80+
%% Total time in seconds spent doing fullsweep garbage collection.
81+
%% Without extra states this time is part of the 'gc' state.
82+
%% </li>
83+
%% <li>
84+
%% `erlang_vm_msacc_nif_seconds_total'<br/>
85+
%% Type: counter.<br/>
86+
%% Total time in seconds spent in NIFs.
87+
%% Without extra states this time is part of the 'emulator' state.
88+
%% </li>
89+
%% <li>
90+
%% `erlang_vm_msacc_send_seconds_total'<br/>
91+
%% Type: counter.<br/>
92+
%% Total time in seconds spent sending messages (processes only).
93+
%% Without extra states this time is part of the 'emulator' state.
94+
%% </li>
95+
%% <li>
96+
%% `erlang_vm_msacc_timers_seconds_total'<br/>
97+
%% Type: counter.<br/>
98+
%% Total time in seconds spent managing timers.
99+
%% Without extra states this time is part of the 'other' state.
100+
%% </li>
101+
%% </ul>
102+
%%
103+
%% ==Configuration==
104+
%%
105+
%% Metrics exported by this collector can be configured via
106+
%% `vm_msacc_collector_metrics' key of `prometheus' app environment.
107+
%%
108+
%% Options are the same as MSAcc_Thread_State return type from
109+
%% <a href="http://erlang.org/doc/man/erlang.html#statistics_microstate_accounting">
110+
%% erlang:statistics(microstate_accounting)
111+
%% </a> with `_seconds_total' as the suffix:
112+
%% <ul>
113+
%% <li>
114+
%% `aux_seconds_total' for `erlang_vm_msacc_aux_seconds_total'.
115+
%% </li>
116+
%% <li>
117+
%% `check_io_seconds_total' for `erlang_vm_msacc_check_io_seconds_total'.
118+
%% </li>
119+
%% <li>
120+
%% `emulator_seconds_total' for `erlang_vm_msacc_emulator_seconds_total'.
121+
%% </li>
122+
%% <li>
123+
%% `gc_seconds_total' for `erlang_vm_msacc_gc_seconds_total'.
124+
%% </li>
125+
%% <li>
126+
%% `other_seconds_total' for `erlang_vm_msacc_other_seconds_total'.
127+
%% </li>
128+
%% <li>
129+
%% `port_seconds_total' for `erlang_vm_msacc_port_seconds_total'.
130+
%% </li>
131+
%% <li>
132+
%% `sleep_seconds_total' for `erlang_vm_msacc_sleep_seconds_total'.
133+
%% </li>
134+
%% <li>
135+
%% `alloc_seconds_total' for `erlang_vm_msacc_alloc_seconds_total'.
136+
%% </li>
137+
%% <li>
138+
%% `bif_seconds_total' for `erlang_vm_msacc_bif_seconds_total'.
139+
%% </li>
140+
%% <li>
141+
%% `busy_wait_seconds_total' for `erlang_vm_msacc_busy_wait_seconds_total'.
142+
%% </li>
143+
%% <li>
144+
%% `ets_seconds_total' for `erlang_vm_msacc_ets_seconds_total'.
145+
%% </li>
146+
%% <li>
147+
%% `gc_full_seconds_total' for `erlang_vm_msacc_gc_full_seconds_total'.
148+
%% </li>
149+
%% <li>
150+
%% `nif_seconds_total' for `erlang_vm_msacc_nif_seconds_total'.
151+
%% </li>
152+
%% <li>
153+
%% `send_seconds_total' for `erlang_vm_msacc_send_seconds_total'.
154+
%% </li>
155+
%% <li>
156+
%% `timers_seconds_total' for `erlang_vm_msacc_timers_seconds_total'.
157+
%% </li>
158+
%% </ul>
159+
%%
160+
%% By default all metrics are enabled as far as Prometheus is concerned,
161+
%% although some metrics could not be enabled by the VM itself.
162+
%% @end
163+
-module(prometheus_vm_msacc_collector).
164+
165+
-export([deregister_cleanup/1,
166+
collect_mf/2]).
167+
168+
-import(prometheus_model_helpers, [create_mf/4]).
169+
170+
-include("prometheus.hrl").
171+
172+
-behaviour(prometheus_collector).
173+
174+
%%====================================================================
175+
%% Macros
176+
%%====================================================================
177+
178+
-define(METRIC_NAME_PREFIX, "erlang_vm_msacc_").
179+
180+
%%====================================================================
181+
%% Collector API
182+
%%====================================================================
183+
184+
%% @private
185+
deregister_cleanup(_) -> ok.
186+
187+
-spec collect_mf(_Registry, Callback) -> ok when
188+
_Registry :: prometheus_registry:registry(),
189+
Callback :: prometheus_collector:callback().
190+
%% @private
191+
collect_mf(_Registry, Callback) ->
192+
Metrics = metrics(),
193+
EnabledMetrics = enabled_metrics(),
194+
[add_metric_family(Metric, Callback)
195+
|| {Name, _, _, _}=Metric <- Metrics, metric_enabled(Name, EnabledMetrics)],
196+
ok.
197+
198+
add_metric_family({Name, Type, Help, Metrics}, Callback) ->
199+
Callback(create_mf(?METRIC_NAME(Name), Help, Type, Metrics)).
200+
201+
%%====================================================================
202+
%% Private Parts
203+
%%====================================================================
204+
205+
metrics() ->
206+
Data = erlang:statistics(microstate_accounting),
207+
SecondAsPerfCounter = erlang:convert_time_unit(1, second, perf_counter),
208+
[
209+
%% Base states.
210+
{aux_seconds_total, counter,
211+
"Total time in seconds spent handling auxiliary jobs.",
212+
metric(aux, Data, SecondAsPerfCounter)},
213+
{check_io_seconds_total, counter,
214+
"Total time in seconds spent checking for new I/O events.",
215+
metric(check_io, Data, SecondAsPerfCounter)},
216+
{emulator_seconds_total, counter,
217+
"Total time in seconds spent executing Erlang processes.",
218+
metric(emulator, Data, SecondAsPerfCounter)},
219+
{gc_seconds_total, counter,
220+
"Total time in seconds spent doing garbage collection. "
221+
"When extra states are enabled this is the time spent "
222+
"doing non-fullsweep garbage collections.",
223+
metric(gc, Data, SecondAsPerfCounter)},
224+
{other_seconds_total, counter,
225+
"Total time in seconds spent doing unaccounted things.",
226+
metric(other, Data, SecondAsPerfCounter)},
227+
{port_seconds_total, counter,
228+
"Total time in seconds spent executing ports.",
229+
metric(port, Data, SecondAsPerfCounter)},
230+
{sleep_seconds_total, counter,
231+
"Total time in seconds spent sleeping.",
232+
metric(sleep, Data, SecondAsPerfCounter)},
233+
%% Extra states.
234+
{alloc_seconds_total, counter,
235+
"Total time in seconds spent managing memory. "
236+
"Without extra states this time is spread out over all other states.",
237+
metric(alloc, Data, SecondAsPerfCounter)},
238+
{bif_seconds_total, counter,
239+
"Total time in seconds spent in BIFs. "
240+
"Without extra states this time is part of the 'emulator' state.",
241+
metric(bif, Data, SecondAsPerfCounter)},
242+
{busy_wait_seconds_total, counter,
243+
"Total time in seconds spent busy waiting. "
244+
"Without extra states this time is part of the 'other' state.",
245+
metric(busy_wait, Data, SecondAsPerfCounter)},
246+
{ets_seconds_total, counter,
247+
"Total time in seconds spent executing ETS BIFs. "
248+
"Without extra states this time is part of the 'emulator' state.",
249+
metric(ets, Data, SecondAsPerfCounter)},
250+
{gc_full_seconds_total, counter,
251+
"Total time in seconds spent doing fullsweep garbage collection. "
252+
"Without extra states this time is part of the 'gc' state.",
253+
metric(gc_full, Data, SecondAsPerfCounter)},
254+
{nif_seconds_total, counter,
255+
"Total time in seconds spent in NIFs. "
256+
"Without extra states this time is part of the 'emulator' state.",
257+
metric(nif, Data, SecondAsPerfCounter)},
258+
{send_seconds_total, counter,
259+
"Total time in seconds spent sending messages (processes only). "
260+
"Without extra states this time is part of the 'emulator' state.",
261+
metric(send, Data, SecondAsPerfCounter)},
262+
{timers_seconds_total, counter,
263+
"Total time in seconds spent managing timers. "
264+
"Without extra states this time is part of the 'other' state.",
265+
metric(timers, Data, SecondAsPerfCounter)}
266+
].
267+
268+
metric(Counter, Data, SecondAsPerfCounter) ->
269+
[{[{type, Type}, {id, ID}], Value / SecondAsPerfCounter}
270+
|| #{type := Type, id := ID, counters := #{Counter := Value}} <- Data].
271+
272+
enabled_metrics() ->
273+
application:get_env(prometheus, vm_msacc_collector_metrics, all).
274+
275+
metric_enabled(Name, Metrics) ->
276+
Metrics =:= all orelse lists:member(Name, Metrics).

src/prometheus_collector.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
prometheus_summary,
8787
prometheus_vm_dist_collector,
8888
prometheus_vm_memory_collector,
89+
prometheus_vm_msacc_collector,
8990
prometheus_vm_statistics_collector,
9091
prometheus_vm_system_info_collector]).
9192

0 commit comments

Comments
 (0)