Skip to content

Commit 8f05ef9

Browse files
[part 8] refactor!(telemetry): rework metrics API (#206)
Redesign the metrics API to eliminate the need for pre-registering a list of metrics. Previously, the API relied on a pull-based approach where metrics had to be registered during the construction of the telemetry module. The module would then periodically retrieve values from the registered metrics. While functional, this design was cumbersome—especially for metrics involving dynamic tags. This refactoring introduces a push-based mechanism. Calls to `rate`, `counter`, or `distribution` now directly send data to the telemetry module, removing the need for upfront registration. This approach simplifies usage and improves flexibility. * simplify test and send metrics in app-closing * add counter test * fix typo * fix json equality * fix json ordering 2 * fix windows and formating * fix windows 2 * Apply suggestions from code review Co-authored-by: pablomartinezbernardo <[email protected]> --------- Co-authored-by: pablomartinezbernardo <[email protected]>
1 parent dca35bd commit 8f05ef9

File tree

19 files changed

+1811
-577
lines changed

19 files changed

+1811
-577
lines changed

BUILD.bazel

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
cc_library(
22
name = "dd_trace_cpp",
33
srcs = [
4+
"src/datadog/common/hash.cpp",
45
"src/datadog/telemetry/configuration.cpp",
5-
"src/datadog/telemetry/metrics.cpp",
66
"src/datadog/telemetry/log.h",
77
"src/datadog/telemetry/telemetry.cpp",
88
"src/datadog/telemetry/telemetry_impl.h",
99
"src/datadog/telemetry/telemetry_impl.cpp",
10+
"src/datadog/telemetry/metric_context.h",
1011
"src/datadog/baggage.cpp",
1112
"src/datadog/base64.cpp",
1213
"src/datadog/cerr_logger.cpp",
@@ -41,6 +42,7 @@ cc_library(
4142
"src/datadog/string_util.cpp",
4243
"src/datadog/tag_propagation.cpp",
4344
"src/datadog/tags.cpp",
45+
"src/datadog/telemetry_metrics.cpp",
4446
"src/datadog/threaded_event_scheduler.cpp",
4547
"src/datadog/tracer_config.cpp",
4648
"src/datadog/tracer.cpp",
@@ -50,6 +52,7 @@ cc_library(
5052
"src/datadog/trace_segment.cpp",
5153
"src/datadog/version.cpp",
5254
"src/datadog/w3c_propagation.cpp",
55+
"src/datadog/common/hash.h",
5356
"src/datadog/base64.h",
5457
"src/datadog/config_manager.h",
5558
"src/datadog/collector_response.h",
@@ -74,6 +77,7 @@ cc_library(
7477
"src/datadog/string_util.h",
7578
"src/datadog/tag_propagation.h",
7679
"src/datadog/tags.h",
80+
"src/datadog/telemetry_metrics.h",
7781
"src/datadog/threaded_event_scheduler.h",
7882
"src/datadog/trace_sampler.h",
7983
"src/datadog/w3c_propagation.h",

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,8 @@ target_sources(dd_trace_cpp-objects
106106
BASE_DIRS include
107107
FILES ${public_header_files}
108108
PRIVATE
109+
src/datadog/common/hash.cpp
109110
src/datadog/telemetry/configuration.cpp
110-
src/datadog/telemetry/metrics.cpp
111111
src/datadog/telemetry/telemetry.cpp
112112
src/datadog/telemetry/telemetry_impl.cpp
113113
src/datadog/baggage.cpp
@@ -150,6 +150,7 @@ target_sources(dd_trace_cpp-objects
150150
src/datadog/trace_sampler_config.cpp
151151
src/datadog/trace_sampler.cpp
152152
src/datadog/trace_segment.cpp
153+
src/datadog/telemetry_metrics.cpp
153154
src/datadog/version.cpp
154155
src/datadog/w3c_propagation.cpp
155156
)
Lines changed: 22 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,146 +1,34 @@
11
#pragma once
22

3-
// This component provides an interface, `Metric`, and specific classes for
4-
// Counter and Gauge metrics. A metric has a name, type, and set of key:value
5-
// tags associated with it. Metrics can be general to APM or language-specific.
6-
// General metrics have `common` set to `true`, and language-specific metrics
7-
// have `common` set to `false`.
8-
9-
#include <atomic>
103
#include <string>
11-
#include <vector>
124

135
namespace datadog {
146
namespace telemetry {
157

16-
class Metric {
17-
// The name of the metric that will be published. A transformation occurs
18-
// based on the name and whether it is "common" or "language-specific" when it
19-
// is recorded.
20-
std::string name_;
21-
// The type of the metric. This will currently be count or gauge.
22-
std::string type_;
23-
// Namespace of the metric.
24-
std::string scope_;
25-
// Tags associated with this specific instance of the metric.
26-
std::vector<std::string> tags_;
27-
// This affects the transformation of the metric name, where it can be a
28-
// common telemetry metric, or a language-specific metric that is prefixed
29-
// with the language name.
30-
bool common_;
31-
32-
protected:
33-
std::atomic<uint64_t> value_ = 0;
34-
Metric(std::string name, std::string type, std::string scope,
35-
std::vector<std::string> tags, bool common);
36-
37-
Metric(Metric&& rhs)
38-
: name_(std::move(rhs.name_)),
39-
type_(std::move(rhs.type_)),
40-
scope_(std::move(rhs.scope_)),
41-
tags_(std::move(rhs.tags_)) {
42-
rhs.value_.store(value_.exchange(rhs.value_));
43-
}
44-
45-
Metric& operator=(Metric&& rhs) {
46-
if (&rhs != this) {
47-
std::swap(name_, rhs.name_);
48-
std::swap(type_, rhs.type_);
49-
std::swap(scope_, rhs.scope_);
50-
std::swap(tags_, rhs.tags_);
51-
rhs.value_.store(value_.exchange(rhs.value_));
52-
}
53-
return *this;
54-
}
55-
56-
public:
57-
// Accessors for name, type, tags, common and capture_and_reset_value are used
58-
// when producing the JSON message for reporting metrics.
59-
std::string name();
60-
std::string type();
61-
std::string scope();
62-
std::vector<std::string> tags();
63-
bool common();
64-
uint64_t value();
65-
uint64_t capture_and_reset_value();
66-
};
67-
68-
// A count metric is used for measuring activity, and has methods for adding a
69-
// number of actions, or incrementing the current number of actions by 1.
70-
class CounterMetric : public Metric {
71-
public:
72-
CounterMetric(std::string name, std::string scope,
73-
std::vector<std::string> tags, bool common);
74-
void inc();
75-
void add(uint64_t amount);
8+
namespace details {
9+
enum class MetricType : char { counter, rate, distribution };
10+
}
11+
12+
/// TODO: pre-compute hash?
13+
template <details::MetricType T>
14+
struct Metric final {
15+
/// The type of the metric.
16+
static constexpr details::MetricType type = T;
17+
/// The name of the metric that will be published. A transformation occurs
18+
/// based on the name and whether it is "common" or "language-specific" when
19+
/// it is recorded.
20+
std::string name;
21+
/// Namespace of the metric.
22+
std::string scope;
23+
/// This affects the transformation of the metric name, where it can be a
24+
/// common telemetry metric, or a language-specific metric that is prefixed
25+
/// with the language name.
26+
bool common;
7627
};
7728

78-
// A gauge metric is used for measuring state, and mas methods to set the
79-
// current state, add or subtract from it, or increment/decrement the current
80-
// state by 1.
81-
class GaugeMetric : public Metric {
82-
public:
83-
GaugeMetric(std::string name, std::string scope,
84-
std::vector<std::string> tags, bool common);
85-
void set(uint64_t value);
86-
void inc();
87-
void add(uint64_t amount);
88-
void dec();
89-
void sub(uint64_t amount);
90-
};
91-
92-
// This structure contains all the metrics that are exposed by tracer
93-
// telemetry.
94-
struct DefaultMetrics {
95-
struct {
96-
telemetry::CounterMetric spans_created = {
97-
"spans_created", "tracers", {}, true};
98-
telemetry::CounterMetric spans_finished = {
99-
"spans_finished", "tracers", {}, true};
100-
101-
telemetry::CounterMetric trace_segments_created_new = {
102-
"trace_segments_created", "tracers", {"new_continued:new"}, true};
103-
telemetry::CounterMetric trace_segments_created_continued = {
104-
"trace_segments_created", "tracers", {"new_continued:continued"}, true};
105-
telemetry::CounterMetric trace_segments_closed = {
106-
"trace_segments_closed", "tracers", {}, true};
107-
telemetry::CounterMetric baggage_items_exceeded = {
108-
"context_header.truncated",
109-
"tracers",
110-
{{"truncation_reason:baggage_item_count_exceeded"}},
111-
true,
112-
};
113-
telemetry::CounterMetric baggage_bytes_exceeded = {
114-
"context_header.truncated",
115-
"tracers",
116-
{{"truncation_reason:baggage_byte_count_exceeded"}},
117-
true,
118-
};
119-
} tracer;
120-
struct {
121-
telemetry::CounterMetric requests = {
122-
"trace_api.requests", "tracers", {}, true};
123-
124-
telemetry::CounterMetric responses_1xx = {
125-
"trace_api.responses", "tracers", {"status_code:1xx"}, true};
126-
telemetry::CounterMetric responses_2xx = {
127-
"trace_api.responses", "tracers", {"status_code:2xx"}, true};
128-
telemetry::CounterMetric responses_3xx = {
129-
"trace_api.responses", "tracers", {"status_code:3xx"}, true};
130-
telemetry::CounterMetric responses_4xx = {
131-
"trace_api.responses", "tracers", {"status_code:4xx"}, true};
132-
telemetry::CounterMetric responses_5xx = {
133-
"trace_api.responses", "tracers", {"status_code:5xx"}, true};
134-
135-
telemetry::CounterMetric errors_timeout = {
136-
"trace_api.errors", "tracers", {"type:timeout"}, true};
137-
telemetry::CounterMetric errors_network = {
138-
"trace_api.errors", "tracers", {"type:network"}, true};
139-
telemetry::CounterMetric errors_status_code = {
140-
"trace_api.errors", "tracers", {"type:status_code"}, true};
141-
142-
} trace_api;
143-
};
29+
using Counter = Metric<details::MetricType::counter>;
30+
using Rate = Metric<details::MetricType::rate>;
31+
using Distribution = Metric<details::MetricType::distribution>;
14432

14533
} // namespace telemetry
14634
} // namespace datadog

include/datadog/telemetry/telemetry.h

Lines changed: 85 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ namespace datadog::telemetry {
3131
void init(FinalizedConfiguration configuration,
3232
std::shared_ptr<tracing::Logger> logger,
3333
std::shared_ptr<tracing::HTTPClient> client,
34-
std::vector<std::shared_ptr<Metric>> metrics,
3534
std::shared_ptr<tracing::EventScheduler> event_scheduler,
3635
tracing::HTTPClient::URL agent_url,
3736
tracing::Clock clock = tracing::default_clock);
@@ -58,10 +57,6 @@ void send_configuration_change();
5857
void capture_configuration_change(
5958
const std::vector<tracing::ConfigMetadata>& new_configuration);
6059

61-
/// Provides access to the telemetry metrics for updating the values.
62-
/// This value should not be stored.
63-
DefaultMetrics& metrics();
64-
6560
/// Report internal warning message to Datadog.
6661
///
6762
/// @param message The warning message to log.
@@ -78,4 +73,89 @@ void report_error_log(std::string message);
7873
/// @param stacktrace Stacktrace leading to the error.
7974
void report_error_log(std::string message, std::string stacktrace);
8075

76+
/// The `counter` namespace provides functions to track values.
77+
/// Counters can be useful for tracking the total number of an event occurring
78+
/// in one time interval. For example, the amount of requests, errors or jobs
79+
/// processed every 10 seconds.
80+
namespace counter {
81+
82+
/// Increments the specified counter by 1.
83+
///
84+
/// @param `counter` the counter to increment.
85+
void increment(const Counter& counter);
86+
87+
/// Increments the specified counter by 1.
88+
///
89+
/// @param `counter` the counter to increment.
90+
/// @param `tags` the distribution tags.
91+
void increment(const Counter& counter, const std::vector<std::string>& tags);
92+
93+
/// Decrements the specified counter by 1.
94+
///
95+
/// @param `counter` the counter to decrement.
96+
void decrement(const Counter& counter);
97+
98+
/// Decrements the specified counter by 1.
99+
///
100+
/// @param `counter` the counter to decrement.
101+
/// @param `tags` the distribution tags.
102+
void decrement(const Counter& counter, const std::vector<std::string>& tags);
103+
104+
/// Sets the counter to a specific value.
105+
///
106+
/// @param `counter` the counter to update.
107+
/// @param `value` the value to assign to the counter.
108+
void set(const Counter& counter, uint64_t value);
109+
110+
/// Sets the counter to a specific value.
111+
///
112+
/// @param `counter` the counter to update.
113+
/// @param `tags` the distribution tags.
114+
/// @param `value` the value to assign to the counter.
115+
void set(const Counter& counter, const std::vector<std::string>& tags,
116+
uint64_t value);
117+
118+
} // namespace counter
119+
120+
/// The `rate` namespace provides support for rate metrics-values.
121+
/// Rates can be useful for tracking the total number of an event occurrences in
122+
/// one time interval. For example, the number of requests per second.
123+
namespace rate {
124+
125+
/// Sets the rate to a specific value.
126+
///
127+
/// @param `rate` the rate to update.
128+
/// @param `value` the value to assign to the counter.
129+
void set(const Rate& rate, uint64_t value);
130+
131+
/// Sets the rate to a specific value.
132+
///
133+
/// @param `rate` the rate to update.
134+
/// @param `tags` the distribution tags.
135+
/// @param `value` the value to assign to the counter.
136+
void set(const Rate& rate, const std::vector<std::string>&, uint64_t value);
137+
138+
} // namespace rate
139+
140+
/// The `distribution` namespace provides support for statistical distribution.
141+
/// Distribution can be useful for tracking things like response times or
142+
/// payload sizes.
143+
namespace distribution {
144+
145+
/// Adds a value to the distribution.
146+
///
147+
/// @param `distribution` the distribution to update.
148+
/// @param `value` the value to add to the distribution.
149+
void add(const Distribution& distribution, uint64_t value);
150+
151+
/// Adds a value to the distribution.
152+
///
153+
/// @param `distribution` the distribution to update.
154+
/// @param `tags` the distribution tags.
155+
/// @param `value` the value to add to the distribution.
156+
void add(const Distribution& distribution, const std::vector<std::string>& tags,
157+
uint64_t value);
158+
159+
} // namespace distribution
160+
81161
} // namespace datadog::telemetry

0 commit comments

Comments
 (0)