@@ -22,13 +22,20 @@ namespace tracing {
2222namespace {
2323
2424const StringView traces_api_path = " /v0.4/traces" ;
25+ const StringView telemetry_v2_path = " /telemetry/proxy/api/v2/apmtelemetry" ;
2526
2627HTTPClient::URL traces_endpoint (const HTTPClient::URL& agent_url) {
2728 auto traces_url = agent_url;
2829 append (traces_url.path , traces_api_path);
2930 return traces_url;
3031}
3132
33+ HTTPClient::URL telemetry_endpoint (const HTTPClient::URL& agent_url) {
34+ auto telemetry_v2_url = agent_url;
35+ append (telemetry_v2_url.path , telemetry_v2_path);
36+ return telemetry_v2_url;
37+ }
38+
3239Expected<void > msgpack_encode (
3340 std::string& destination,
3441 const std::vector<DatadogAgent::TraceChunk>& trace_chunks) {
@@ -124,24 +131,71 @@ std::variant<CollectorResponse, std::string> parse_agent_traces_response(
124131
125132} // namespace
126133
127- DatadogAgent::DatadogAgent (const FinalizedDatadogAgentConfig& config,
128- const Clock& clock,
129- const std::shared_ptr<Logger>& logger)
130- : clock_(clock),
134+ DatadogAgent::DatadogAgent (
135+ const FinalizedDatadogAgentConfig& config,
136+ const std::shared_ptr<TracerTelemetry>& tracer_telemetry,
137+ const Clock& clock, const std::shared_ptr<Logger>& logger)
138+ : tracer_telemetry_(tracer_telemetry),
139+ clock_ (clock),
131140 logger_(logger),
132141 traces_endpoint_(traces_endpoint(config.url)),
142+ telemetry_endpoint_(telemetry_endpoint(config.url)),
133143 http_client_(config.http_client),
134144 event_scheduler_(config.event_scheduler),
135145 cancel_scheduled_flush_(event_scheduler_->schedule_recurring_event (
136146 config.flush_interval, [this ]() { flush (); })),
137147 flush_interval_(config.flush_interval) {
138148 assert (logger_);
149+ assert (tracer_telemetry_);
150+ if (tracer_telemetry_->enabled ()) {
151+ // Only schedule this if telemetry is enabled.
152+ // Every 10 seconds, have the tracer telemetry capture the metrics values.
153+ // Every 60 seconds, also report those values to the datadog agent.
154+ cancel_telemetry_timer_ = event_scheduler_->schedule_recurring_event (
155+ std::chrono::seconds (10 ), [this , n = 0 ]() mutable {
156+ n++;
157+ tracer_telemetry_->capture_metrics ();
158+ if (n % 6 == 0 ) {
159+ send_heartbeat_and_telemetry ();
160+ }
161+ });
162+ // Callback for setting telemetry request headers.
163+ telemetry_set_request_headers_ = [](DictWriter& headers) {
164+ headers.set (" Content-Type" , " application/json" );
165+ };
166+ // Callback for successful telemetry HTTP requests, to examine HTTP status.
167+ telemetry_on_response_ = [logger = logger_](
168+ int response_status,
169+ const DictReader& /* response_headers*/ ,
170+ std::string response_body) {
171+ if (response_status < 200 || response_status >= 300 ) {
172+ logger->log_error ([&](auto & stream) {
173+ stream << " Unexpected telemetry response status " << response_status
174+ << " with body (starts on next line):\n "
175+ << response_body;
176+ });
177+ }
178+ };
179+ // Callback for unsuccessful telemetry HTTP requests.
180+ telemetry_on_error_ = [logger = logger_](Error error) {
181+ logger->log_error (error.with_prefix (
182+ " Error occurred during HTTP request for telemetry: " ));
183+ };
184+ }
139185}
140186
141187DatadogAgent::~DatadogAgent () {
142188 const auto deadline = clock_ ().tick + std::chrono::seconds (2 );
143189 cancel_scheduled_flush_ ();
144190 flush ();
191+ if (tracer_telemetry_->enabled ()) {
192+ // This action only needs to occur if tracer telemetry is enabled.
193+ cancel_telemetry_timer_ ();
194+ tracer_telemetry_->capture_metrics ();
195+ // The app-closing message is bundled with a message containing the final
196+ // metric values.
197+ send_app_closing ();
198+ }
145199 http_client_->drain (deadline);
146200}
147201
@@ -154,7 +208,6 @@ Expected<void> DatadogAgent::send(
154208}
155209
156210nlohmann::json DatadogAgent::config_json () const {
157- const auto & url = traces_endpoint_; // brevity
158211 const auto flush_interval_milliseconds =
159212 std::chrono::duration_cast<std::chrono::milliseconds>(flush_interval_)
160213 .count ();
@@ -163,7 +216,8 @@ nlohmann::json DatadogAgent::config_json() const {
163216 return nlohmann::json::object ({
164217 {" type" , " datadog::tracing::DatadogAgent" },
165218 {" config" , nlohmann::json::object ({
166- {" url" , (url.scheme + " ://" + url.authority + url.path )},
219+ {" traces_url" , (traces_endpoint_.scheme + " ://" + traces_endpoint_.authority + traces_endpoint_.path )},
220+ {" telemetry_url" , (telemetry_endpoint_.scheme + " ://" + telemetry_endpoint_.authority + telemetry_endpoint_.path )},
167221 {" flush_interval_milliseconds" , flush_interval_milliseconds},
168222 {" http_client" , http_client_->config_json ()},
169223 {" event_scheduler" , event_scheduler_->config_json ()},
@@ -211,10 +265,22 @@ void DatadogAgent::flush() {
211265
212266 // This is the callback for the HTTP response. It's invoked
213267 // asynchronously.
214- auto on_response = [samplers = std::move (response_handlers),
268+ auto on_response = [telemetry = tracer_telemetry_,
269+ samplers = std::move (response_handlers),
215270 logger = logger_](int response_status,
216271 const DictReader& /* response_headers*/ ,
217272 std::string response_body) {
273+ if (response_status >= 500 ) {
274+ telemetry->metrics ().trace_api .responses_5xx .inc ();
275+ } else if (response_status >= 400 ) {
276+ telemetry->metrics ().trace_api .responses_4xx .inc ();
277+ } else if (response_status >= 300 ) {
278+ telemetry->metrics ().trace_api .responses_3xx .inc ();
279+ } else if (response_status >= 200 ) {
280+ telemetry->metrics ().trace_api .responses_2xx .inc ();
281+ } else if (response_status >= 100 ) {
282+ telemetry->metrics ().trace_api .responses_1xx .inc ();
283+ }
218284 if (response_status != 200 ) {
219285 logger->log_error ([&](auto & stream) {
220286 stream << " Unexpected response status " << response_status
@@ -250,16 +316,53 @@ void DatadogAgent::flush() {
250316 // This is the callback for if something goes wrong sending the
251317 // request or retrieving the response. It's invoked
252318 // asynchronously.
253- auto on_error = [logger = logger_](Error error) {
254- logger->log_error (
255- error.with_prefix (" Error occurred during HTTP request: " ));
319+ auto on_error = [telemetry = tracer_telemetry_,
320+ logger = logger_](Error error) {
321+ telemetry->metrics ().trace_api .errors_network .inc ();
322+ logger->log_error (error.with_prefix (
323+ " Error occurred during HTTP request for submitting traces: " ));
256324 };
257325
326+ tracer_telemetry_->metrics ().trace_api .requests .inc ();
258327 auto post_result = http_client_->post (
259328 traces_endpoint_, std::move (set_request_headers), std::move (body),
260329 std::move (on_response), std::move (on_error));
261330 if (auto * error = post_result.if_error ()) {
262- logger_->log_error (*error);
331+ logger_->log_error (
332+ error->with_prefix (" Unexpected error submitting traces: " ));
333+ }
334+ }
335+
336+ void DatadogAgent::send_app_started (nlohmann::json&& tracer_config) {
337+ auto payload = tracer_telemetry_->app_started (std::move (tracer_config));
338+ auto post_result = http_client_->post (
339+ telemetry_endpoint_, telemetry_set_request_headers_, std::move (payload),
340+ telemetry_on_response_, telemetry_on_error_);
341+ if (auto * error = post_result.if_error ()) {
342+ logger_->log_error (error->with_prefix (
343+ " Unexpected error submitting telemetry app-started event: " ));
344+ }
345+ }
346+
347+ void DatadogAgent::send_heartbeat_and_telemetry () {
348+ auto payload = tracer_telemetry_->heartbeat_and_telemetry ();
349+ auto post_result = http_client_->post (
350+ telemetry_endpoint_, telemetry_set_request_headers_, std::move (payload),
351+ telemetry_on_response_, telemetry_on_error_);
352+ if (auto * error = post_result.if_error ()) {
353+ logger_->log_error (error->with_prefix (
354+ " Unexpected error submitting telemetry app-heartbeat event: " ));
355+ }
356+ }
357+
358+ void DatadogAgent::send_app_closing () {
359+ auto payload = tracer_telemetry_->app_closing ();
360+ auto post_result = http_client_->post (
361+ telemetry_endpoint_, telemetry_set_request_headers_, std::move (payload),
362+ telemetry_on_response_, telemetry_on_error_);
363+ if (auto * error = post_result.if_error ()) {
364+ logger_->log_error (error->with_prefix (
365+ " Unexpected error submitting telemetry app-closing event: " ));
263366 }
264367}
265368
0 commit comments