Skip to content

Commit 74da19f

Browse files
authored
Prometheus metrics (#1083)
* implement a Prometheus bridge The bridge reads metrics from the Prometheus registry, and converts them to our own metrics. This is currently supported for Counters, Gauges, and Summaries. Histograms are not supported.
1 parent 5ed034d commit 74da19f

File tree

13 files changed

+298
-14
lines changed

13 files changed

+298
-14
lines changed

.ci/.jenkins_framework.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@ FRAMEWORK:
4343
- graphene-2
4444
- httpx-newest
4545
- httplib2-newest
46+
- prometheus_client-newest

docs/configuration.asciidoc

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,38 @@ By setting this to `False`, tracking this metric is completely disabled, which c
954954

955955
NOTE: This feature requires APM Server and Kibana >= 7.3.
956956

957+
[float]
958+
[[config-prometheus_metrics]]
959+
==== `prometheus_metrics` (Beta)
960+
961+
[options="header"]
962+
|============
963+
| Environment | Django/Flask | Default
964+
| `ELASTIC_APM_PROMETHEUS_METRICS` | `PROMETHEUS_METRICS` | `False`
965+
|============
966+
967+
Enable/disable the tracking and collection of metrics from `prometheus_client`.
968+
969+
See <<prometheus-metricset>> for more information.
970+
971+
NOTE: This feature is currently in beta status.
972+
973+
[float]
974+
[[config-prometheus_metrics_prefix]]
975+
==== `prometheus_metrics_prefix` (Beta)
976+
977+
[options="header"]
978+
|============
979+
| Environment | Django/Flask | Default
980+
| `ELASTIC_APM_PROMETHEUS_METRICS_PREFIX` | `PROMETHEUS_METRICS_PREFIX` | `prometheus.metrics.`
981+
|============
982+
983+
A prefix to prepend to Prometheus metrics names.
984+
985+
See <<prometheus-metricset>> for more information.
986+
987+
NOTE: This feature is currently in beta status.
988+
957989
[float]
958990
[[config-central_config]]
959991
==== `central_config`

docs/metrics.asciidoc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ These metrics will be sent regularly to the APM Server and from there to Elastic
1111
* <<cpu-memory-metricset>>
1212
* <<transactions-metricset>>
1313
* <<breakdown-metricset>>
14+
* <<prometheus-metricset>>
1415

1516
[float]
1617
[[cpu-memory-metricset]]
@@ -178,3 +179,28 @@ You can filter and group by these dimensions:
178179
* `span.subtype`: The sub-type of the span, for example `mysql` (optional)
179180

180181
--
182+
[float]
183+
[[prometheus-metricset]]
184+
==== Prometheus metric set (beta)
185+
186+
beta[]
187+
188+
If you use https://github.com/prometheus/client_python[`prometheus_client`] to collect metrics, the agent can
189+
collect them as well and make them available in Elasticsearch.
190+
191+
The following types of metrics are supported:
192+
193+
* Counters
194+
* Gauges
195+
* Summaries
196+
197+
To use the Prometheus metric set, you have to enable it with the <<config-prometheus_metrics, `prometheus_metrics`>> configuration option.
198+
199+
All metrics collected from `prometheus_client` are prefixed with `"prometheus.metrics."`. This can be changed using the <<config-prometheus_metrics_prefix, `prometheus_metrics_prefix`>> configuration option.
200+
201+
[float]
202+
[[prometheus-metricset-beta]]
203+
===== Beta limitations
204+
* The metrics format may change without backwards compatibility in future releases.
205+
* Histograms are currently not supported.
206+

elasticapm/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ def __init__(self, config=None, **inline):
195195
self._metrics.register(path)
196196
if self.config.breakdown_metrics:
197197
self._metrics.register("elasticapm.metrics.sets.breakdown.BreakdownMetricSet")
198+
if self.config.prometheus_metrics:
199+
self._metrics.register("elasticapm.metrics.sets.prometheus.PrometheusMetrics")
198200
self._thread_managers["metrics"] = self._metrics
199201
compat.atexit_register(self.close)
200202
if self.config.central_config:

elasticapm/conf/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,8 @@ class Config(_ConfigBase):
559559
default=30000,
560560
)
561561
breakdown_metrics = _BoolConfigValue("BREAKDOWN_METRICS", default=True)
562+
prometheus_metrics = _BoolConfigValue("PROMETHEUS_METRICS", default=False)
563+
prometheus_metrics_prefix = _ConfigValue("PROMETHEUS_METRICS_PREFIX", default="prometheus.metrics.")
562564
disable_metrics = _ListConfigValue("DISABLE_METRICS", type=starmatch_to_regex, default=[])
563565
central_config = _BoolConfigValue("CENTRAL_CONFIG", default=True)
564566
api_request_size = _ConfigValue("API_REQUEST_SIZE", type=int, validators=[size_validator], default=768 * 1024)

elasticapm/metrics/base_metrics.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,17 +142,18 @@ def gauge(self, name, reset_on_collect=False, **labels):
142142
"""
143143
return self._metric(self._gauges, Gauge, name, reset_on_collect, labels)
144144

145-
def timer(self, name, reset_on_collect=False, **labels):
145+
def timer(self, name, reset_on_collect=False, unit=None, **labels):
146146
"""
147147
Returns an existing or creates and returns a new timer
148148
:param name: name of the timer
149149
:param reset_on_collect: indicate if the timer should be reset to 0 when collecting
150+
:param unit: Unit of the observed metric
150151
:param labels: a flat key/value map of labels
151152
:return: the timer object
152153
"""
153-
return self._metric(self._timers, Timer, name, reset_on_collect, labels)
154+
return self._metric(self._timers, Timer, name, reset_on_collect, labels, unit)
154155

155-
def _metric(self, container, metric_class, name, reset_on_collect, labels):
156+
def _metric(self, container, metric_class, name, reset_on_collect, labels, unit=None):
156157
"""
157158
Returns an existing or creates and returns a metric
158159
:param container: the container for the metric
@@ -178,7 +179,7 @@ def _metric(self, container, metric_class, name, reset_on_collect, labels):
178179
)
179180
metric = noop_metric
180181
else:
181-
metric = metric_class(name, reset_on_collect=reset_on_collect)
182+
metric = metric_class(name, reset_on_collect=reset_on_collect, unit=unit)
182183
container[key] = metric
183184
return container[key]
184185

@@ -219,7 +220,10 @@ def collect(self):
219220
if t is not noop_metric:
220221
val, count = t.val
221222
if val or not t.reset_on_collect:
222-
samples[labels].update({name + ".sum.us": {"value": int(val * 1000000)}})
223+
sum_name = ".sum"
224+
if t._unit:
225+
sum_name += "." + t._unit
226+
samples[labels].update({name + sum_name: {"value": val}})
223227
samples[labels].update({name + ".count": {"value": count}})
224228
if t.reset_on_collect:
225229
t.reset()
@@ -260,11 +264,12 @@ def before_yield(self, data):
260264
class Counter(object):
261265
__slots__ = ("name", "_lock", "_initial_value", "_val", "reset_on_collect")
262266

263-
def __init__(self, name, initial_value=0, reset_on_collect=False):
267+
def __init__(self, name, initial_value=0, reset_on_collect=False, unit=None):
264268
"""
265269
Creates a new counter
266270
:param name: name of the counter
267271
:param initial_value: initial value of the counter, defaults to 0
272+
:param unit: unit of the observed counter. Unused for counters
268273
"""
269274
self.name = name
270275
self._lock = threading.Lock()
@@ -305,14 +310,20 @@ def val(self):
305310
"""Returns the current value of the counter"""
306311
return self._val
307312

313+
@val.setter
314+
def val(self, value):
315+
with self._lock:
316+
self._val = value
317+
308318

309319
class Gauge(object):
310320
__slots__ = ("name", "_val", "reset_on_collect")
311321

312-
def __init__(self, name, reset_on_collect=False):
322+
def __init__(self, name, reset_on_collect=False, unit=None):
313323
"""
314324
Creates a new gauge
315325
:param name: label of the gauge
326+
:param unit of the observed gauge. Unused for gauges
316327
"""
317328
self.name = name
318329
self._val = None
@@ -331,12 +342,13 @@ def reset(self):
331342

332343

333344
class Timer(object):
334-
__slots__ = ("name", "_val", "_count", "_lock", "reset_on_collect")
345+
__slots__ = ("name", "_val", "_count", "_lock", "_unit", "reset_on_collect")
335346

336-
def __init__(self, name=None, reset_on_collect=False):
347+
def __init__(self, name=None, reset_on_collect=False, unit=None):
337348
self.name = name
338349
self._val = 0
339350
self._count = 0
351+
self._unit = unit
340352
self._lock = threading.Lock()
341353
self.reset_on_collect = reset_on_collect
342354

@@ -355,6 +367,11 @@ def val(self):
355367
with self._lock:
356368
return self._val, self._count
357369

370+
@val.setter
371+
def val(self, value):
372+
with self._lock:
373+
self._val, self._count = value
374+
358375

359376
class NoopMetric(object):
360377
"""
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# BSD 3-Clause License
2+
#
3+
# Copyright (c) 2020, Elasticsearch BV
4+
# All rights reserved.
5+
#
6+
# Redistribution and use in source and binary forms, with or without
7+
# modification, are permitted provided that the following conditions are met:
8+
#
9+
# * Redistributions of source code must retain the above copyright notice, this
10+
# list of conditions and the following disclaimer.
11+
#
12+
# * Redistributions in binary form must reproduce the above copyright notice,
13+
# this list of conditions and the following disclaimer in the documentation
14+
# and/or other materials provided with the distribution.
15+
#
16+
# * Neither the name of the copyright holder nor the names of its
17+
# contributors may be used to endorse or promote products derived from
18+
# this software without specific prior written permission.
19+
#
20+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+
from __future__ import absolute_import
31+
32+
import itertools
33+
34+
import prometheus_client
35+
36+
from elasticapm.metrics.base_metrics import MetricsSet
37+
38+
39+
class PrometheusMetrics(MetricsSet):
40+
def __init__(self, registry):
41+
super(PrometheusMetrics, self).__init__(registry)
42+
self._prometheus_registry = prometheus_client.REGISTRY
43+
44+
def before_collect(self):
45+
for metric in self._prometheus_registry.collect():
46+
metric_type = self.METRIC_MAP.get(metric.type, None)
47+
if not metric_type:
48+
continue
49+
metric_type(self, metric.name, metric.samples)
50+
51+
def _prom_counter_handler(self, name, samples):
52+
# Counters can be converted 1:1 from Prometheus to our
53+
# format. Each pair of samples represents a distinct labelset for a
54+
# given name. The pair consists of the value, and a "created" timestamp.
55+
# We only use the former.
56+
for total_sample, _ in grouper(samples, 2):
57+
self.counter(
58+
self._registry.client.config.prometheus_metrics_prefix + name, **total_sample.labels
59+
).val = total_sample.value
60+
61+
def _prom_gauge_handler(self, name, samples):
62+
# Counters can be converted 1:1 from Prometheus to our
63+
# format. Each sample represents a distinct labelset for a
64+
# given name
65+
for sample in samples:
66+
self.gauge(
67+
self._registry.client.config.prometheus_metrics_prefix + name, **sample.labels
68+
).val = sample.value
69+
70+
def _prom_summary_handler(self, name, samples):
71+
# Prometheus Summaries are analogous to our Timers, having
72+
# a count and a sum. A prometheus summary is represented by
73+
# three values. The list of samples for a given name can be
74+
# grouped into 3-pairs of (count, sum, creation_timestamp).
75+
# Each 3-pair represents a labelset.
76+
for count_sample, sum_sample, _ in grouper(samples, 3):
77+
self.timer(self._registry.client.config.prometheus_metrics_prefix + name, **count_sample.labels).val = (
78+
sum_sample.value,
79+
count_sample.value,
80+
)
81+
82+
METRIC_MAP = {"counter": _prom_counter_handler, "gauge": _prom_gauge_handler, "summary": _prom_summary_handler}
83+
84+
85+
def grouper(iterable, n, fillvalue=None):
86+
"""Collect data into fixed-length chunks or blocks"""
87+
# grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
88+
args = [iter(iterable)] * n
89+
return itertools.zip_longest(*args, fillvalue=fillvalue)

elasticapm/traces.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,9 @@ def end(self, skip_frames=0, duration=None):
165165
self._transaction_metrics.timer(
166166
"transaction.duration",
167167
reset_on_collect=True,
168+
unit="us",
168169
**{"transaction.name": self.name, "transaction.type": self.transaction_type}
169-
).update(self.duration)
170+
).update(int(self.duration * 1000000))
170171
if self._breakdown:
171172
for (span_type, span_subtype), timer in compat.iteritems(self._span_timers):
172173
labels = {
@@ -176,15 +177,19 @@ def end(self, skip_frames=0, duration=None):
176177
}
177178
if span_subtype:
178179
labels["span.subtype"] = span_subtype
179-
self._breakdown.timer("span.self_time", reset_on_collect=True, **labels).update(*timer.val)
180+
val = timer.val
181+
self._breakdown.timer("span.self_time", reset_on_collect=True, unit="us", **labels).update(
182+
int(val[0] * 1000000), val[1]
183+
)
180184
labels = {"transaction.name": self.name, "transaction.type": self.transaction_type}
181185
if self.is_sampled:
182186
self._breakdown.counter("transaction.breakdown.count", reset_on_collect=True, **labels).inc()
183187
self._breakdown.timer(
184188
"span.self_time",
185189
reset_on_collect=True,
190+
unit="us",
186191
**{"span.type": "app", "transaction.name": self.name, "transaction.type": self.transaction_type}
187-
).update(self.duration - self._child_durations.duration)
192+
).update(int((self.duration - self._child_durations.duration) * 1000000))
188193

189194
def _begin_span(
190195
self,

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ markers =
148148
starlette
149149
graphene
150150
httpx
151+
prometheus_client
151152

152153
[isort]
153154
line_length=120

tests/metrics/base_tests.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ def test_metrics_not_collected_if_zero_and_reset(elasticapm_client):
150150
resetting_counter = m.counter("resetting_counter", reset_on_collect=True)
151151
gauge = m.gauge("gauge", reset_on_collect=False)
152152
resetting_gauge = m.gauge("resetting_gauge", reset_on_collect=True)
153-
timer = m.timer("timer", reset_on_collect=False)
154-
resetting_timer = m.timer("resetting_timer", reset_on_collect=True)
153+
timer = m.timer("timer", reset_on_collect=False, unit="us")
154+
resetting_timer = m.timer("resetting_timer", reset_on_collect=True, unit="us")
155155

156156
counter.inc(), resetting_counter.inc()
157157
gauge.val = 5

0 commit comments

Comments
 (0)