Skip to content

Commit fd23d07

Browse files
authored
Merge pull request #62 from DataDog/cgilmour/telemetry-api
Implement telemetry API v2
2 parents e93458b + c57a039 commit fd23d07

25 files changed

+868
-53
lines changed

BUILD.bazel

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ cc_library(
1515
"src/datadog/id_generator.cpp",
1616
"src/datadog/limiter.cpp",
1717
"src/datadog/logger.cpp",
18+
"src/datadog/metrics.cpp",
1819
"src/datadog/msgpack.cpp",
1920
"src/datadog/null_collector.cpp",
2021
"src/datadog/parse_util.cpp",
2122
"src/datadog/platform_util.cpp",
2223
"src/datadog/propagation_style.cpp",
2324
"src/datadog/random.cpp",
2425
"src/datadog/rate.cpp",
26+
"src/datadog/runtime_id.cpp",
2527
"src/datadog/span.cpp",
2628
"src/datadog/span_data.cpp",
2729
"src/datadog/span_defaults.cpp",
@@ -32,6 +34,7 @@ cc_library(
3234
"src/datadog/tags.cpp",
3335
"src/datadog/threaded_event_scheduler.cpp",
3436
"src/datadog/tracer_config.cpp",
37+
"src/datadog/tracer_telemetry.cpp",
3538
"src/datadog/tracer.cpp",
3639
"src/datadog/trace_id.cpp",
3740
"src/datadog/trace_sampler_config.cpp",
@@ -64,6 +67,7 @@ cc_library(
6467
"src/datadog/json_fwd.hpp",
6568
"src/datadog/limiter.h",
6669
"src/datadog/logger.h",
70+
"src/datadog/metrics.h",
6771
"src/datadog/msgpack.h",
6872
"src/datadog/null_collector.h",
6973
"src/datadog/optional.h",
@@ -72,6 +76,7 @@ cc_library(
7276
"src/datadog/propagation_style.h",
7377
"src/datadog/random.h",
7478
"src/datadog/rate.h",
79+
"src/datadog/runtime_id.h",
7580
"src/datadog/sampling_decision.h",
7681
"src/datadog/sampling_mechanism.h",
7782
"src/datadog/sampling_priority.h",
@@ -88,6 +93,7 @@ cc_library(
8893
"src/datadog/tags.h",
8994
"src/datadog/threaded_event_scheduler.h",
9095
"src/datadog/tracer_config.h",
96+
"src/datadog/tracer_telemetry.h",
9197
"src/datadog/tracer.h",
9298
"src/datadog/trace_id.h",
9399
"src/datadog/trace_sampler_config.h",
@@ -110,4 +116,4 @@ cc_library(
110116
"@com_google_absl//absl/strings",
111117
"@com_google_absl//absl/types:optional",
112118
],
113-
)
119+
)

CMakeLists.txt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,15 @@ target_sources(dd_trace_cpp-objects PRIVATE
110110
src/datadog/id_generator.cpp
111111
src/datadog/limiter.cpp
112112
src/datadog/logger.cpp
113+
src/datadog/metrics.cpp
113114
src/datadog/msgpack.cpp
114115
src/datadog/null_collector.cpp
115116
src/datadog/parse_util.cpp
116117
src/datadog/platform_util.cpp
117118
src/datadog/propagation_style.cpp
118119
src/datadog/random.cpp
119120
src/datadog/rate.cpp
121+
src/datadog/runtime_id.cpp
120122
src/datadog/span.cpp
121123
src/datadog/span_data.cpp
122124
src/datadog/span_defaults.cpp
@@ -127,6 +129,7 @@ target_sources(dd_trace_cpp-objects PRIVATE
127129
src/datadog/tags.cpp
128130
src/datadog/threaded_event_scheduler.cpp
129131
src/datadog/tracer_config.cpp
132+
src/datadog/tracer_telemetry.cpp
130133
src/datadog/tracer.cpp
131134
src/datadog/trace_id.cpp
132135
src/datadog/trace_sampler_config.cpp
@@ -165,6 +168,7 @@ target_sources(dd_trace_cpp-objects PUBLIC
165168
src/datadog/json.hpp
166169
src/datadog/limiter.h
167170
src/datadog/logger.h
171+
src/datadog/metrics.h
168172
src/datadog/msgpack.h
169173
src/datadog/null_collector.h
170174
src/datadog/optional.h
@@ -173,6 +177,7 @@ target_sources(dd_trace_cpp-objects PUBLIC
173177
src/datadog/propagation_style.h
174178
src/datadog/random.h
175179
src/datadog/rate.h
180+
src/datadog/runtime_id.h
176181
src/datadog/sampling_decision.h
177182
src/datadog/sampling_mechanism.h
178183
src/datadog/sampling_priority.h
@@ -189,6 +194,7 @@ target_sources(dd_trace_cpp-objects PUBLIC
189194
src/datadog/tags.h
190195
src/datadog/threaded_event_scheduler.h
191196
src/datadog/tracer_config.h
197+
src/datadog/tracer_telemetry.h
192198
src/datadog/tracer.h
193199
src/datadog/trace_id.h
194200
src/datadog/trace_sampler_config.h
@@ -205,12 +211,12 @@ include_directories(${CMAKE_BINARY_DIR}/include)
205211

206212
# Linking this library requires libcurl and threads.
207213
find_package(Threads REQUIRED)
208-
target_link_libraries(dd_trace_cpp-objects
209-
PUBLIC
210-
${CMAKE_BINARY_DIR}/lib/libcurl.a
211-
PUBLIC
212-
Threads::Threads
213-
${COVERAGE_LIBRARIES}
214+
target_link_libraries(dd_trace_cpp-objects
215+
PUBLIC
216+
${CMAKE_BINARY_DIR}/lib/libcurl.a
217+
PUBLIC
218+
Threads::Threads
219+
${COVERAGE_LIBRARIES}
214220
${COREFOUNDATION_LIBRARY}
215221
${SYSTEMCONFIGURATION_LIBRARY}
216222
)

src/datadog/datadog_agent.cpp

Lines changed: 114 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,20 @@ namespace tracing {
2222
namespace {
2323

2424
const StringView traces_api_path = "/v0.4/traces";
25+
const StringView telemetry_v2_path = "/telemetry/proxy/api/v2/apmtelemetry";
2526

2627
HTTPClient::URL traces_endpoint(const HTTPClient::URL& agent_url) {
2728
auto traces_url = agent_url;
2829
append(traces_url.path, traces_api_path);
2930
return traces_url;
3031
}
3132

33+
HTTPClient::URL telemetry_endpoint(const HTTPClient::URL& agent_url) {
34+
auto telemetry_v2_url = agent_url;
35+
append(telemetry_v2_url.path, telemetry_v2_path);
36+
return telemetry_v2_url;
37+
}
38+
3239
Expected<void> msgpack_encode(
3340
std::string& destination,
3441
const std::vector<DatadogAgent::TraceChunk>& trace_chunks) {
@@ -124,24 +131,71 @@ std::variant<CollectorResponse, std::string> parse_agent_traces_response(
124131

125132
} // namespace
126133

127-
DatadogAgent::DatadogAgent(const FinalizedDatadogAgentConfig& config,
128-
const Clock& clock,
129-
const std::shared_ptr<Logger>& logger)
130-
: clock_(clock),
134+
DatadogAgent::DatadogAgent(
135+
const FinalizedDatadogAgentConfig& config,
136+
const std::shared_ptr<TracerTelemetry>& tracer_telemetry,
137+
const Clock& clock, const std::shared_ptr<Logger>& logger)
138+
: tracer_telemetry_(tracer_telemetry),
139+
clock_(clock),
131140
logger_(logger),
132141
traces_endpoint_(traces_endpoint(config.url)),
142+
telemetry_endpoint_(telemetry_endpoint(config.url)),
133143
http_client_(config.http_client),
134144
event_scheduler_(config.event_scheduler),
135145
cancel_scheduled_flush_(event_scheduler_->schedule_recurring_event(
136146
config.flush_interval, [this]() { flush(); })),
137147
flush_interval_(config.flush_interval) {
138148
assert(logger_);
149+
assert(tracer_telemetry_);
150+
if (tracer_telemetry_->enabled()) {
151+
// Only schedule this if telemetry is enabled.
152+
// Every 10 seconds, have the tracer telemetry capture the metrics values.
153+
// Every 60 seconds, also report those values to the datadog agent.
154+
cancel_telemetry_timer_ = event_scheduler_->schedule_recurring_event(
155+
std::chrono::seconds(10), [this, n = 0]() mutable {
156+
n++;
157+
tracer_telemetry_->capture_metrics();
158+
if (n % 6 == 0) {
159+
send_heartbeat_and_telemetry();
160+
}
161+
});
162+
// Callback for setting telemetry request headers.
163+
telemetry_set_request_headers_ = [](DictWriter& headers) {
164+
headers.set("Content-Type", "application/json");
165+
};
166+
// Callback for successful telemetry HTTP requests, to examine HTTP status.
167+
telemetry_on_response_ = [logger = logger_](
168+
int response_status,
169+
const DictReader& /*response_headers*/,
170+
std::string response_body) {
171+
if (response_status < 200 || response_status >= 300) {
172+
logger->log_error([&](auto& stream) {
173+
stream << "Unexpected telemetry response status " << response_status
174+
<< " with body (starts on next line):\n"
175+
<< response_body;
176+
});
177+
}
178+
};
179+
// Callback for unsuccessful telemetry HTTP requests.
180+
telemetry_on_error_ = [logger = logger_](Error error) {
181+
logger->log_error(error.with_prefix(
182+
"Error occurred during HTTP request for telemetry: "));
183+
};
184+
}
139185
}
140186

141187
DatadogAgent::~DatadogAgent() {
142188
const auto deadline = clock_().tick + std::chrono::seconds(2);
143189
cancel_scheduled_flush_();
144190
flush();
191+
if (tracer_telemetry_->enabled()) {
192+
// This action only needs to occur if tracer telemetry is enabled.
193+
cancel_telemetry_timer_();
194+
tracer_telemetry_->capture_metrics();
195+
// The app-closing message is bundled with a message containing the final
196+
// metric values.
197+
send_app_closing();
198+
}
145199
http_client_->drain(deadline);
146200
}
147201

@@ -154,7 +208,6 @@ Expected<void> DatadogAgent::send(
154208
}
155209

156210
nlohmann::json DatadogAgent::config_json() const {
157-
const auto& url = traces_endpoint_; // brevity
158211
const auto flush_interval_milliseconds =
159212
std::chrono::duration_cast<std::chrono::milliseconds>(flush_interval_)
160213
.count();
@@ -163,7 +216,8 @@ nlohmann::json DatadogAgent::config_json() const {
163216
return nlohmann::json::object({
164217
{"type", "datadog::tracing::DatadogAgent"},
165218
{"config", nlohmann::json::object({
166-
{"url", (url.scheme + "://" + url.authority + url.path)},
219+
{"traces_url", (traces_endpoint_.scheme + "://" + traces_endpoint_.authority + traces_endpoint_.path)},
220+
{"telemetry_url", (telemetry_endpoint_.scheme + "://" + telemetry_endpoint_.authority + telemetry_endpoint_.path)},
167221
{"flush_interval_milliseconds", flush_interval_milliseconds},
168222
{"http_client", http_client_->config_json()},
169223
{"event_scheduler", event_scheduler_->config_json()},
@@ -211,10 +265,22 @@ void DatadogAgent::flush() {
211265

212266
// This is the callback for the HTTP response. It's invoked
213267
// asynchronously.
214-
auto on_response = [samplers = std::move(response_handlers),
268+
auto on_response = [telemetry = tracer_telemetry_,
269+
samplers = std::move(response_handlers),
215270
logger = logger_](int response_status,
216271
const DictReader& /*response_headers*/,
217272
std::string response_body) {
273+
if (response_status >= 500) {
274+
telemetry->metrics().trace_api.responses_5xx.inc();
275+
} else if (response_status >= 400) {
276+
telemetry->metrics().trace_api.responses_4xx.inc();
277+
} else if (response_status >= 300) {
278+
telemetry->metrics().trace_api.responses_3xx.inc();
279+
} else if (response_status >= 200) {
280+
telemetry->metrics().trace_api.responses_2xx.inc();
281+
} else if (response_status >= 100) {
282+
telemetry->metrics().trace_api.responses_1xx.inc();
283+
}
218284
if (response_status != 200) {
219285
logger->log_error([&](auto& stream) {
220286
stream << "Unexpected response status " << response_status
@@ -250,16 +316,53 @@ void DatadogAgent::flush() {
250316
// This is the callback for if something goes wrong sending the
251317
// request or retrieving the response. It's invoked
252318
// asynchronously.
253-
auto on_error = [logger = logger_](Error error) {
254-
logger->log_error(
255-
error.with_prefix("Error occurred during HTTP request: "));
319+
auto on_error = [telemetry = tracer_telemetry_,
320+
logger = logger_](Error error) {
321+
telemetry->metrics().trace_api.errors_network.inc();
322+
logger->log_error(error.with_prefix(
323+
"Error occurred during HTTP request for submitting traces: "));
256324
};
257325

326+
tracer_telemetry_->metrics().trace_api.requests.inc();
258327
auto post_result = http_client_->post(
259328
traces_endpoint_, std::move(set_request_headers), std::move(body),
260329
std::move(on_response), std::move(on_error));
261330
if (auto* error = post_result.if_error()) {
262-
logger_->log_error(*error);
331+
logger_->log_error(
332+
error->with_prefix("Unexpected error submitting traces: "));
333+
}
334+
}
335+
336+
void DatadogAgent::send_app_started(nlohmann::json&& tracer_config) {
337+
auto payload = tracer_telemetry_->app_started(std::move(tracer_config));
338+
auto post_result = http_client_->post(
339+
telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload),
340+
telemetry_on_response_, telemetry_on_error_);
341+
if (auto* error = post_result.if_error()) {
342+
logger_->log_error(error->with_prefix(
343+
"Unexpected error submitting telemetry app-started event: "));
344+
}
345+
}
346+
347+
void DatadogAgent::send_heartbeat_and_telemetry() {
348+
auto payload = tracer_telemetry_->heartbeat_and_telemetry();
349+
auto post_result = http_client_->post(
350+
telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload),
351+
telemetry_on_response_, telemetry_on_error_);
352+
if (auto* error = post_result.if_error()) {
353+
logger_->log_error(error->with_prefix(
354+
"Unexpected error submitting telemetry app-heartbeat event: "));
355+
}
356+
}
357+
358+
void DatadogAgent::send_app_closing() {
359+
auto payload = tracer_telemetry_->app_closing();
360+
auto post_result = http_client_->post(
361+
telemetry_endpoint_, telemetry_set_request_headers_, std::move(payload),
362+
telemetry_on_response_, telemetry_on_error_);
363+
if (auto* error = post_result.if_error()) {
364+
logger_->log_error(error->with_prefix(
365+
"Unexpected error submitting telemetry app-closing event: "));
263366
}
264367
}
265368

src/datadog/datadog_agent.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "collector.h"
1515
#include "event_scheduler.h"
1616
#include "http_client.h"
17+
#include "metrics.h"
18+
#include "tracer_telemetry.h"
1719

1820
namespace datadog {
1921
namespace tracing {
@@ -32,26 +34,38 @@ class DatadogAgent : public Collector {
3234

3335
private:
3436
std::mutex mutex_;
37+
std::shared_ptr<TracerTelemetry> tracer_telemetry_;
3538
Clock clock_;
3639
std::shared_ptr<Logger> logger_;
3740
std::vector<TraceChunk> trace_chunks_;
3841
HTTPClient::URL traces_endpoint_;
42+
HTTPClient::URL telemetry_endpoint_;
3943
std::shared_ptr<HTTPClient> http_client_;
4044
std::shared_ptr<EventScheduler> event_scheduler_;
4145
EventScheduler::Cancel cancel_scheduled_flush_;
46+
EventScheduler::Cancel cancel_telemetry_timer_;
4247
std::chrono::steady_clock::duration flush_interval_;
48+
// Callbacks for submitting telemetry data
49+
HTTPClient::HeadersSetter telemetry_set_request_headers_;
50+
HTTPClient::ResponseHandler telemetry_on_response_;
51+
HTTPClient::ErrorHandler telemetry_on_error_;
4352

4453
void flush();
54+
void send_heartbeat_and_telemetry();
55+
void send_app_closing();
4556

4657
public:
47-
DatadogAgent(const FinalizedDatadogAgentConfig&, const Clock& clock,
58+
DatadogAgent(const FinalizedDatadogAgentConfig&,
59+
const std::shared_ptr<TracerTelemetry>&, const Clock& clock,
4860
const std::shared_ptr<Logger>&);
4961
~DatadogAgent();
5062

5163
Expected<void> send(
5264
std::vector<std::unique_ptr<SpanData>>&& spans,
5365
const std::shared_ptr<TraceSampler>& response_handler) override;
5466

67+
void send_app_started(nlohmann::json&& tracer_config);
68+
5569
nlohmann::json config_json() const override;
5670
};
5771

src/datadog/environment.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ namespace environment {
2727
#define LIST_ENVIRONMENT_VARIABLES(MACRO) \
2828
MACRO(DD_AGENT_HOST) \
2929
MACRO(DD_ENV) \
30+
MACRO(DD_INSTRUMENTATION_TELEMETRY_ENABLED) \
3031
MACRO(DD_PROPAGATION_STYLE_EXTRACT) \
3132
MACRO(DD_PROPAGATION_STYLE_INJECT) \
3233
MACRO(DD_TRACE_PROPAGATION_STYLE_EXTRACT) \

0 commit comments

Comments
 (0)