Skip to content

Commit 458e5d1

Browse files
committed
prometheus_text_format: Optimize binary construction
The formatting callback for a registry can build each metrics family as a single binary in order to reduce garbage. This mainly involves passing the accumulator binary through all functions that append to it. It's more efficient to append to the resulting binary than to allocate smaller binaries and then append them. For example: <<Blob/binary, Name/binary, "_", Suffix/binary>>. %% versus Combined = <<Name/binary, "_", Suffix/binary>>, <<Blob/binary, Combined/binary>>. The first expression generates less garbage than the second. A good example of this was the `add_brackets/1` function which was inlined. Inlining does not turn the first expression (above) into the second according to the compiler unfortunately, so we pay the cost of creating a binary with brackets and then formatting that into the larger blob, rather than formatting in just by copying. This change manually inlines `add_brackets/1` into its caller `render_series/4`. This change also changes some list strings into binaries. Especially for ASCII, strings binaries are _far_ more compact than lists. Lists need two words per ASCII character - one for the character and one for the tail pointer. So it's like UTF-32 but worse, basically UTF-128 on a 64 bit machine. ASCII or UTF-8 text in binaries takes one byte per character in the binary's array, plus a word or two of metadata. E.g. `<<"hello">>` allocates three words while `"hello"` allocates ten.
1 parent 0a5425f commit 458e5d1

File tree

1 file changed

+61
-78
lines changed

1 file changed

+61
-78
lines changed

src/formats/prometheus_text_format.erl

Lines changed: 61 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ http_request_duration_milliseconds_sum{method=\"post\"} 4350
2929
-export([content_type/0, format/0, format/1, render_labels/1, escape_label_value/1]).
3030

3131
-ifdef(TEST).
32-
-export([escape_metric_help/1, emit_mf_prologue/2, emit_mf_metrics/2]).
32+
-export([escape_metric_help/1]).
3333
-endif.
3434

3535
-include("prometheus_model.hrl").
3636

3737
-behaviour(prometheus_format).
38-
-compile({inline, [add_brackets/1, render_label_pair/1]}).
38+
-compile({inline, [render_label_pair/1]}).
3939

4040
?DOC("""
4141
Returns content type of the latest \[text format](https://bit.ly/2cxSuJP).
@@ -85,50 +85,40 @@ escape_label_value(Value) ->
8585
erlang:error({invalid_value, Value}).
8686

8787
registry_collect_callback(Fd, Registry, Collector) ->
88-
Callback = fun(MF) ->
89-
emit_mf_prologue(Fd, MF),
90-
emit_mf_metrics(Fd, MF)
88+
Callback = fun(#'MetricFamily'{name = Name0, help = Help, type = Type, metric = Metrics}) ->
89+
%% eagerly convert the name to a binary so we can copy more efficiently
90+
%% in `render_metrics/3`
91+
Name = iolist_to_binary(Name0),
92+
Prologue = <<
93+
"# TYPE ",
94+
Name/binary,
95+
" ",
96+
(string_type(Type))/binary,
97+
"\n# HELP ",
98+
Name/binary,
99+
" ",
100+
(escape_metric_help(Help))/binary,
101+
"\n"
102+
>>,
103+
%% file:write/2 is an expensive operation, as it goes through a port driver.
104+
%% Instead a large chunk of bytes is being collected here, in a
105+
%% way that triggers binary append optimization in ERTS.
106+
file:write(Fd, render_metrics(Name, Metrics, Prologue))
91107
end,
92108
prometheus_collector:collect_mf(Registry, Collector, Callback).
93109

94-
?DOC(false).
95-
-spec emit_mf_prologue(Fd :: file:fd(), prometheus_model:'MetricFamily'()) -> ok.
96-
emit_mf_prologue(Fd, #'MetricFamily'{name = Name, help = Help, type = Type}) ->
97-
Bytes = [
98-
"# TYPE ",
99-
Name,
100-
" ",
101-
string_type(Type),
102-
"\n# HELP ",
103-
Name,
104-
" ",
105-
escape_metric_help(Help),
106-
"\n"
107-
],
108-
file:write(Fd, Bytes).
110+
render_metrics(_Name, [], Bytes) ->
111+
Bytes;
112+
render_metrics(Name, [Metric | Rest], Bytes) ->
113+
render_metrics(Name, Rest, render_metric(Bytes, Name, Metric)).
109114

110-
?DOC(false).
111-
-spec emit_mf_metrics(file:fd(), prometheus_model:'MetricFamily'()) -> ok | {error, term()}.
112-
emit_mf_metrics(Fd, #'MetricFamily'{name = Name, metric = Metrics}) ->
113-
%% file:write/2 is an expensive operation, as it goes through a port driver.
114-
%% Instead a large chunk of bytes is being collected here, in a
115-
%% way that triggers binary append optimization in ERTS.
116-
Bytes = lists:foldl(
117-
fun(Metric, Blob) ->
118-
<<Blob/binary, (render_metric(Name, Metric))/binary>>
119-
end,
120-
<<>>,
121-
Metrics
122-
),
123-
file:write(Fd, Bytes).
124-
125-
render_metric(Name, #'Metric'{label = Labels, counter = #'Counter'{value = Value}}) ->
126-
render_series(Name, render_labels(Labels), Value);
127-
render_metric(Name, #'Metric'{label = Labels, gauge = #'Gauge'{value = Value}}) ->
128-
render_series(Name, render_labels(Labels), Value);
129-
render_metric(Name, #'Metric'{label = Labels, untyped = #'Untyped'{value = Value}}) ->
130-
render_series(Name, render_labels(Labels), Value);
131-
render_metric(Name, #'Metric'{
115+
render_metric(Bytes0, Name, #'Metric'{label = Labels, counter = #'Counter'{value = Value}}) ->
116+
render_series(Bytes0, Name, render_labels(Labels), Value);
117+
render_metric(Bytes0, Name, #'Metric'{label = Labels, gauge = #'Gauge'{value = Value}}) ->
118+
render_series(Bytes0, Name, render_labels(Labels), Value);
119+
render_metric(Bytes0, Name, #'Metric'{label = Labels, untyped = #'Untyped'{value = Value}}) ->
120+
render_series(Bytes0, Name, render_labels(Labels), Value);
121+
render_metric(Bytes0, Name, #'Metric'{
132122
label = Labels,
133123
summary = #'Summary'{
134124
sample_count = Count,
@@ -137,12 +127,13 @@ render_metric(Name, #'Metric'{
137127
}
138128
}) ->
139129
LString = render_labels(Labels),
140-
Bytes1 = render_series([Name, "_count"], LString, Count),
141-
Bytes2 = <<Bytes1/binary, (render_series([Name, "_sum"], LString, Sum))/binary>>,
130+
Bytes1 = render_series(Bytes0, <<Name/binary, "_count">>, LString, Count),
131+
Bytes2 = render_series(Bytes1, <<Name/binary, "_sum">>, LString, Sum),
142132
Bytes3 = lists:foldl(
143133
fun(#'Quantile'{quantile = QN, value = QV}, Blob) ->
144-
Val = render_series(
145-
[Name],
134+
render_series(
135+
Blob,
136+
Name,
146137
render_labels(
147138
[
148139
LString,
@@ -153,14 +144,13 @@ render_metric(Name, #'Metric'{
153144
]
154145
),
155146
QV
156-
),
157-
<<Blob/binary, Val/binary>>
147+
)
158148
end,
159149
Bytes2,
160150
Quantiles
161151
),
162152
Bytes3;
163-
render_metric(Name, #'Metric'{
153+
render_metric(Bytes0, Name, #'Metric'{
164154
label = Labels,
165155
histogram = #'Histogram'{
166156
sample_count = Count,
@@ -172,33 +162,34 @@ render_metric(Name, #'Metric'{
172162
LString = render_labels(Labels),
173163
Bytes1 = lists:foldl(
174164
fun(Bucket, Blob) ->
175-
<<Blob/binary, (emit_histogram_bucket(Name, LString, Bucket))/binary>>
165+
emit_histogram_bucket(Blob, Name, LString, Bucket)
176166
end,
177-
<<>>,
167+
Bytes0,
178168
Buckets
179169
),
180-
Bytes2 = <<Bytes1/binary, (render_series([Name, "_count"], LString, Count))/binary>>,
181-
Bytes3 = <<Bytes2/binary, (render_series([Name, "_sum"], LString, Sum))/binary>>,
170+
Bytes2 = render_series(Bytes1, <<Name/binary, "_count">>, LString, Count),
171+
Bytes3 = render_series(Bytes2, <<Name/binary, "_sum">>, LString, Sum),
182172
Bytes3.
183173

184-
emit_histogram_bucket(Name, LString, #'Bucket'{cumulative_count = BCount, upper_bound = BBound}) ->
174+
emit_histogram_bucket(Bytes0, Name, LString, #'Bucket'{cumulative_count = BCount, upper_bound = BBound}) ->
185175
BLValue = bound_to_label_value(BBound),
186176
render_series(
187-
[Name, "_bucket"],
177+
Bytes0,
178+
<<Name/binary, "_bucket">>,
188179
render_labels([LString, #'LabelPair'{name = "le", value = BLValue}]),
189180
BCount
190181
).
191182

192183
string_type('COUNTER') ->
193-
"counter";
184+
<<"counter">>;
194185
string_type('GAUGE') ->
195-
"gauge";
186+
<<"gauge">>;
196187
string_type('SUMMARY') ->
197-
"summary";
188+
<<"summary">>;
198189
string_type('HISTOGRAM') ->
199-
"histogram";
190+
<<"histogram">>;
200191
string_type('UNTYPED') ->
201-
"untyped".
192+
<<"untyped">>.
202193

203194
%% binary() in spec means 0 or more already rendered labels (name,
204195
%% escaped value), joined with "," in between
@@ -233,26 +224,18 @@ render_label_pair(B) when is_binary(B) ->
233224
render_label_pair(#'LabelPair'{name = Name, value = Value}) ->
234225
<<(iolist_to_binary(Name))/binary, "=\"", (escape_label_value(Value))/binary, "\"">>.
235226

236-
add_brackets(<<>>) ->
237-
<<>>;
238-
add_brackets(LString) ->
239-
<<"{", LString/binary, "}">>.
227+
render_series(Bytes, Name, <<>>, Value) ->
228+
render_value(<<Bytes/binary, Name/binary, " ">>, Value);
229+
render_series(Bytes, Name, LString, Value) ->
230+
render_value(<<Bytes/binary, Name/binary, "{", LString/binary, "} ">>, Value).
240231

241-
render_series(Name, LString, undefined) ->
242-
<<(iolist_to_binary(Name))/binary, (add_brackets(LString))/binary, " NaN\n">>;
243-
render_series(Name, LString, Value) when is_integer(Value) ->
244-
<<
245-
(iolist_to_binary(Name))/binary,
246-
(add_brackets(LString))/binary,
247-
" ",
248-
(integer_to_binary(Value))/binary,
249-
"\n"
250-
>>;
251-
render_series(Name, LString, Value) ->
232+
render_value(Bytes, undefined) ->
233+
<<Bytes/binary, "NaN\n">>;
234+
render_value(Bytes, Value) when is_integer(Value) ->
235+
<<Bytes/binary, (integer_to_binary(Value))/binary, "\n">>;
236+
render_value(Bytes, Value) ->
252237
<<
253-
(iolist_to_binary(Name))/binary,
254-
(add_brackets(LString))/binary,
255-
" ",
238+
Bytes/binary,
256239
(iolist_to_binary(io_lib:format("~p", [Value])))/binary,
257240
"\n"
258241
>>.

0 commit comments

Comments
 (0)