@@ -17,6 +17,40 @@ using namespace datadog::tracing;
1717using namespace std ::chrono_literals;
1818
1919namespace datadog ::telemetry {
20+ namespace internal_metrics {
21+
22+ // / The number of logs created with a given log level. Useful for calculating
23+ // / impact for other features (automatic sending of logs). Levels should be one
24+ // / of `debug`, `info`, `warn`, `error`, `critical`.
25+ const telemetry::Counter logs_created{" logs_created" , " general" , true };
26+
27+ // / The number of requests sent to the api endpoint in the agent that errored,
28+ // / tagged by the error type (e.g. `type:timeout`, `type:network`,
29+ // / `type:status_code`) and Endpoint (`endpoint:agent`, `endpoint:agentless`).
30+ const telemetry::Counter errors{" telemetry_api.errors" , " telemetry" , true };
31+
32+ // / The number of requests sent to a telemetry endpoint, regardless of success,
33+ // / tagged by the endpoint (`endpoint:agent`, `endpoint:agentless`).
34+ const telemetry::Counter requests{" telemetry_api.requests" , " telemetry" , true };
35+
36+ // / The number of responses received from the endpoint, tagged with status code
37+ // / (`status_code:200`, `status_code:404`) and endpoint (`endpoint:agent`,
38+ // / `endpoint:agentless`).
39+ const telemetry::Counter responses{" telemetry_api.responses" , " telemetry" ,
40+ true };
41+
42+ // / The size of the payload sent to the stats endpoint in bytes, tagged by the
43+ // / endpoint (`endpoint:agent`, `endpoint:agentless`).
44+ const telemetry::Distribution bytes_sent{" telemetry_api.bytes" , " telemetry" ,
45+ true };
46+
47+ // / The time it takes to send the payload sent to the endpoint in ms, tagged by
48+ // / the endpoint (`endpoint:agent`, `endpoint:agentless`).
49+ const telemetry::Distribution request_duration{" telemetry_api.ms" , " telemetry" ,
50+ true };
51+
52+ } // namespace internal_metrics
53+
2054namespace {
2155
2256HTTPClient::URL make_telemetry_endpoint (HTTPClient::URL url) {
@@ -174,26 +208,8 @@ Telemetry::Telemetry(FinalizedConfiguration config,
174208 host_info_(get_host_info()) {
175209 // Callback for successful telemetry HTTP requests, to examine HTTP
176210 // status.
177- telemetry_on_response_ = [logger = logger_](
178- int response_status,
179- const DictReader& /* response_headers*/ ,
180- std::string response_body) {
181- if (response_status < 200 || response_status >= 300 ) {
182- logger->log_error ([&](auto & stream) {
183- stream << " Unexpected telemetry response status " << response_status
184- << " with body (if any, starts on next line):\n "
185- << response_body;
186- });
187- }
188- };
189-
190- // Callback for unsuccessful telemetry HTTP requests.
191- telemetry_on_error_ = [logger = logger_](Error error) {
192- logger->log_error (error.with_prefix (
193- " Error occurred during HTTP request for telemetry: " ));
194- };
195-
196211 send_telemetry (" app-started" , app_started ());
212+ http_client_->drain (clock_ ().tick + 2s);
197213 schedule_tasks ();
198214}
199215
@@ -216,20 +232,23 @@ Telemetry::~Telemetry() {
216232 // The app-closing message is bundled with a message containing the
217233 // final metric values.
218234 send_telemetry (" app-closing" , app_closing ());
219- http_client_->drain (clock_ ().tick + 1s );
235+ http_client_->drain (clock_ ().tick + 2s );
220236 }
221237}
222238
223239Telemetry::Telemetry (Telemetry&& rhs)
224240 : config_(std::move(rhs.config_)),
225241 logger_ (std::move(rhs.logger_)),
226- telemetry_on_response_(std::move(rhs.telemetry_on_response_)),
227- telemetry_on_error_(std::move(rhs.telemetry_on_error_)),
228242 telemetry_endpoint_(std::move(rhs.telemetry_endpoint_)),
229243 tracer_signature_(std::move(rhs.tracer_signature_)),
230244 http_client_(rhs.http_client_),
231245 clock_(std::move(rhs.clock_)),
232246 scheduler_(std::move(rhs.scheduler_)),
247+ counters_(std::move(rhs.counters_)),
248+ counters_snapshot_(std::move(rhs.counters_snapshot_)),
249+ rates_(std::move(rhs.rates_)),
250+ rates_snapshot_(std::move(rhs.rates_snapshot_)),
251+ distributions_(std::move(rhs.distributions_)),
233252 seq_id_(rhs.seq_id_),
234253 config_seq_ids_(rhs.config_seq_ids_),
235254 host_info_(rhs.host_info_) {
@@ -242,13 +261,17 @@ Telemetry& Telemetry::operator=(Telemetry&& rhs) {
242261 cancel_tasks (rhs.tasks_ );
243262 std::swap (config_, rhs.config_ );
244263 std::swap (logger_, rhs.logger_ );
245- std::swap (telemetry_on_response_, rhs.telemetry_on_response_ );
246- std::swap (telemetry_on_error_, rhs.telemetry_on_error_ );
247264 std::swap (telemetry_endpoint_, rhs.telemetry_endpoint_ );
248265 std::swap (http_client_, rhs.http_client_ );
249266 std::swap (tracer_signature_, rhs.tracer_signature_ );
250267 std::swap (http_client_, rhs.http_client_ );
251268 std::swap (clock_, rhs.clock_ );
269+ std::swap (scheduler_, rhs.scheduler_ );
270+ std::swap (counters_, rhs.counters_ );
271+ std::swap (counters_snapshot_, rhs.counters_snapshot_ );
272+ std::swap (rates_, rhs.rates_ );
273+ std::swap (rates_snapshot_, rhs.rates_snapshot_ );
274+ std::swap (distributions_, rhs.distributions_ );
252275 std::swap (seq_id_, rhs.seq_id_ );
253276 std::swap (config_seq_ids_, rhs.config_seq_ids_ );
254277 std::swap (host_info_, rhs.host_info_ );
@@ -259,16 +282,19 @@ Telemetry& Telemetry::operator=(Telemetry&& rhs) {
259282
260283void Telemetry::log_error (std::string message) {
261284 if (!config_.report_logs ) return ;
285+ increment_counter (internal_metrics::logs_created, {" level:error" });
262286 log (std::move (message), LogLevel::ERROR);
263287}
264288
265289void Telemetry::log_error (std::string message, std::string stacktrace) {
266290 if (!config_.report_logs ) return ;
291+ increment_counter (internal_metrics::logs_created, {" level:error" });
267292 log (std::move (message), LogLevel::ERROR, stacktrace);
268293}
269294
270295void Telemetry::log_warning (std::string message) {
271296 if (!config_.report_logs ) return ;
297+ increment_counter (internal_metrics::logs_created, {" level:warning" });
272298 log (std::move (message), LogLevel::WARNING);
273299}
274300
@@ -293,10 +319,55 @@ void Telemetry::send_telemetry(StringView request_type, std::string payload) {
293319 }
294320 };
295321
296- auto post_result = http_client_->post (
297- telemetry_endpoint_, set_telemetry_headers, std::move (payload),
298- telemetry_on_response_, telemetry_on_error_, clock_ ().tick + 5s);
322+ auto telemetry_on_response = [this , logger = logger_](
323+ int response_status,
324+ const DictReader& /* response_headers*/ ,
325+ std::string response_body) {
326+ if (response_status >= 500 ) {
327+ increment_counter (internal_metrics::responses,
328+ {" status_code:5xx" , " endpoint:agent" });
329+ } else if (response_status >= 400 ) {
330+ increment_counter (internal_metrics::responses,
331+ {" status_code:4xx" , " endpoint:agent" });
332+ } else if (response_status >= 300 ) {
333+ increment_counter (internal_metrics::responses,
334+ {" status_code:3xx" , " endpoint:agent" });
335+ } else if (response_status >= 200 ) {
336+ increment_counter (internal_metrics::responses,
337+ {" status_code:2xx" , " endpoint:agent" });
338+ } else if (response_status >= 100 ) {
339+ increment_counter (internal_metrics::responses,
340+ {" status_code:1xx" , " endpoint:agent" });
341+ }
342+
343+ if (response_status < 200 || response_status >= 300 ) {
344+ logger->log_error ([&](auto & stream) {
345+ stream << " Unexpected telemetry response status " << response_status
346+ << " with body (if any, starts on next line):\n "
347+ << response_body;
348+ });
349+ }
350+ };
351+
352+ // Callback for unsuccessful telemetry HTTP requests.
353+ auto telemetry_on_error = [this , logger = logger_](Error error) {
354+ increment_counter (internal_metrics::errors,
355+ {" type:network" , " endpoint:agent" });
356+ logger->log_error (error.with_prefix (
357+ " Error occurred during HTTP request for telemetry: " ));
358+ };
359+
360+ increment_counter (internal_metrics::requests, {" endpoint:agent" });
361+ add_datapoint (internal_metrics::bytes_sent, {" endpoint:agent" },
362+ payload.size ());
363+
364+ auto post_result =
365+ http_client_->post (telemetry_endpoint_, set_telemetry_headers,
366+ std::move (payload), std::move (telemetry_on_response),
367+ std::move (telemetry_on_error), clock_ ().tick + 5s);
299368 if (auto * error = post_result.if_error ()) {
369+ increment_counter (internal_metrics::errors,
370+ {" type:network" , " endpoint:agent" });
300371 logger_->log_error (
301372 error->with_prefix (" Unexpected error submitting telemetry event: " ));
302373 }
0 commit comments