Skip to content

Commit 98ee6e8

Browse files
committed
Implement endpoint guessing
1 parent 8021373 commit 98ee6e8

18 files changed

+539
-45
lines changed

BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ cc_library(
1717
"src/datadog/datadog_agent_config.cpp",
1818
"src/datadog/datadog_agent.cpp",
1919
"src/datadog/default_http_client_null.cpp",
20+
"src/datadog/endpoint_guessing.cpp",
2021
"src/datadog/environment.cpp",
2122
"src/datadog/error.cpp",
2223
"src/datadog/extraction_util.cpp",
@@ -60,6 +61,7 @@ cc_library(
6061
"src/datadog/datadog_agent.h",
6162
"src/datadog/default_http_client.h",
6263
"src/datadog/extracted_data.h",
64+
"src/datadog/endpoint_guessing.h",
6365
"src/datadog/extraction_util.h",
6466
"src/datadog/glob.h",
6567
"src/datadog/hex.h",

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ target_sources(dd-trace-cpp-objects
173173
src/datadog/collector_response.cpp
174174
src/datadog/datadog_agent_config.cpp
175175
src/datadog/datadog_agent.cpp
176+
src/datadog/endpoint_guessing.cpp
176177
src/datadog/environment.cpp
177178
src/datadog/error.cpp
178179
src/datadog/extraction_util.cpp

include/datadog/config.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ enum class ConfigName : char {
2828
TRACE_BAGGAGE_MAX_BYTES,
2929
TRACE_BAGGAGE_MAX_ITEMS,
3030
APM_TRACING_ENABLED,
31+
TRACE_RESOURCE_RENAMING_ENABLED,
32+
TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT,
3133
};
3234

3335
// Represents metadata for configuration parameters

include/datadog/environment.h

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -23,44 +23,46 @@ namespace environment {
2323

2424
// To enforce correspondence between `enum Variable` and `variable_names`, the
2525
// preprocessor is used so that the DD_* symbols are listed exactly once.
26-
#define LIST_ENVIRONMENT_VARIABLES(MACRO) \
27-
MACRO(DD_AGENT_HOST) \
28-
MACRO(DD_ENV) \
29-
MACRO(DD_INSTRUMENTATION_TELEMETRY_ENABLED) \
30-
MACRO(DD_PROPAGATION_STYLE_EXTRACT) \
31-
MACRO(DD_PROPAGATION_STYLE_INJECT) \
32-
MACRO(DD_REMOTE_CONFIGURATION_ENABLED) \
33-
MACRO(DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS) \
34-
MACRO(DD_SERVICE) \
35-
MACRO(DD_SPAN_SAMPLING_RULES) \
36-
MACRO(DD_SPAN_SAMPLING_RULES_FILE) \
37-
MACRO(DD_TRACE_PROPAGATION_STYLE_EXTRACT) \
38-
MACRO(DD_TRACE_PROPAGATION_STYLE_INJECT) \
39-
MACRO(DD_TRACE_PROPAGATION_STYLE) \
40-
MACRO(DD_TAGS) \
41-
MACRO(DD_TRACE_AGENT_PORT) \
42-
MACRO(DD_TRACE_AGENT_URL) \
43-
MACRO(DD_TRACE_DEBUG) \
44-
MACRO(DD_TRACE_ENABLED) \
45-
MACRO(DD_TRACE_RATE_LIMIT) \
46-
MACRO(DD_TRACE_REPORT_HOSTNAME) \
47-
MACRO(DD_TRACE_SAMPLE_RATE) \
48-
MACRO(DD_TRACE_SAMPLING_RULES) \
49-
MACRO(DD_TRACE_STARTUP_LOGS) \
50-
MACRO(DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH) \
51-
MACRO(DD_VERSION) \
52-
MACRO(DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED) \
53-
MACRO(DD_TELEMETRY_HEARTBEAT_INTERVAL) \
54-
MACRO(DD_TELEMETRY_METRICS_ENABLED) \
55-
MACRO(DD_TELEMETRY_METRICS_INTERVAL_SECONDS) \
56-
MACRO(DD_TELEMETRY_DEBUG) \
57-
MACRO(DD_TRACE_BAGGAGE_MAX_ITEMS) \
58-
MACRO(DD_TRACE_BAGGAGE_MAX_BYTES) \
59-
MACRO(DD_TELEMETRY_LOG_COLLECTION_ENABLED) \
60-
MACRO(DD_INSTRUMENTATION_INSTALL_ID) \
61-
MACRO(DD_INSTRUMENTATION_INSTALL_TYPE) \
62-
MACRO(DD_INSTRUMENTATION_INSTALL_TIME) \
63-
MACRO(DD_APM_TRACING_ENABLED) \
26+
#define LIST_ENVIRONMENT_VARIABLES(MACRO) \
27+
MACRO(DD_AGENT_HOST) \
28+
MACRO(DD_ENV) \
29+
MACRO(DD_INSTRUMENTATION_TELEMETRY_ENABLED) \
30+
MACRO(DD_PROPAGATION_STYLE_EXTRACT) \
31+
MACRO(DD_PROPAGATION_STYLE_INJECT) \
32+
MACRO(DD_REMOTE_CONFIGURATION_ENABLED) \
33+
MACRO(DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS) \
34+
MACRO(DD_SERVICE) \
35+
MACRO(DD_SPAN_SAMPLING_RULES) \
36+
MACRO(DD_SPAN_SAMPLING_RULES_FILE) \
37+
MACRO(DD_TRACE_PROPAGATION_STYLE_EXTRACT) \
38+
MACRO(DD_TRACE_PROPAGATION_STYLE_INJECT) \
39+
MACRO(DD_TRACE_PROPAGATION_STYLE) \
40+
MACRO(DD_TAGS) \
41+
MACRO(DD_TRACE_AGENT_PORT) \
42+
MACRO(DD_TRACE_AGENT_URL) \
43+
MACRO(DD_TRACE_DEBUG) \
44+
MACRO(DD_TRACE_ENABLED) \
45+
MACRO(DD_TRACE_RATE_LIMIT) \
46+
MACRO(DD_TRACE_REPORT_HOSTNAME) \
47+
MACRO(DD_TRACE_SAMPLE_RATE) \
48+
MACRO(DD_TRACE_SAMPLING_RULES) \
49+
MACRO(DD_TRACE_STARTUP_LOGS) \
50+
MACRO(DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH) \
51+
MACRO(DD_VERSION) \
52+
MACRO(DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED) \
53+
MACRO(DD_TELEMETRY_HEARTBEAT_INTERVAL) \
54+
MACRO(DD_TELEMETRY_METRICS_ENABLED) \
55+
MACRO(DD_TELEMETRY_METRICS_INTERVAL_SECONDS) \
56+
MACRO(DD_TELEMETRY_DEBUG) \
57+
MACRO(DD_TRACE_BAGGAGE_MAX_ITEMS) \
58+
MACRO(DD_TRACE_BAGGAGE_MAX_BYTES) \
59+
MACRO(DD_TELEMETRY_LOG_COLLECTION_ENABLED) \
60+
MACRO(DD_INSTRUMENTATION_INSTALL_ID) \
61+
MACRO(DD_INSTRUMENTATION_INSTALL_TYPE) \
62+
MACRO(DD_INSTRUMENTATION_INSTALL_TIME) \
63+
MACRO(DD_APM_TRACING_ENABLED) \
64+
MACRO(DD_TRACE_RESOURCE_RENAMING_ENABLED) \
65+
MACRO(DD_TRACE_RESOURCE_RENAMING_ALWAYS_SIMPLIFIED_ENDPOINT) \
6466
MACRO(DD_EXTERNAL_ENV)
6567

6668
#define WITH_COMMA(ARG) ARG,

include/datadog/trace_segment.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "runtime_id.h"
3838
#include "sampling_decision.h"
3939
#include "sampling_priority.h"
40+
#include "tracer_config.h"
4041

4142
namespace datadog {
4243
namespace telemetry {
@@ -79,6 +80,8 @@ class TraceSegment {
7980

8081
std::shared_ptr<ConfigManager> config_manager_;
8182

83+
ResourceRenamingMode resource_renaming_mode_;
84+
8285
bool tracing_enabled_;
8386

8487
public:
@@ -97,6 +100,7 @@ class TraceSegment {
97100
Optional<std::string> additional_w3c_tracestate,
98101
Optional<std::string> additional_datadog_w3c_tracestate,
99102
std::unique_ptr<SpanData> local_root,
103+
ResourceRenamingMode resource_renaming_mode,
100104
bool tracing_enabled = true);
101105

102106
const SpanDefaults& defaults() const;

include/datadog/tracer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class Tracer {
5555
bool baggage_injection_enabled_;
5656
bool baggage_extraction_enabled_;
5757
bool tracing_enabled_;
58+
ResourceRenamingMode resource_renaming_mode_;
5859

5960
public:
6061
// Create a tracer configured using the specified `config`, and optionally:

include/datadog/tracer_config.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,21 @@ struct TracerConfig {
178178
/// Overridden by the `DD_APM_TRACING_ENABLED` environment variable. Defaults
179179
/// to `true`.
180180
Optional<bool> tracing_enabled;
181+
182+
// Whether generation of http.endpoint is enabled. This is disabled by
183+
// default.
184+
Optional<bool> resource_renaming_enabled;
185+
186+
// Whether http.endpoint is always calculated, even when http.route is
187+
// present. This is disabled by default.
188+
// This option is ignored if `resource_renaming_enabled` is not `true`.
189+
Optional<bool> resource_renaming_always_simplified_endpoint;
190+
};
191+
192+
enum class ResourceRenamingMode : std::uint8_t {
193+
DISABLED,
194+
FALLBACK, // only if http.route is not present
195+
ALWAYS_CALCULATE,
181196
};
182197

183198
// `FinalizedTracerConfig` contains `Tracer` implementation details derived from
@@ -218,6 +233,7 @@ class FinalizedTracerConfig final {
218233
std::shared_ptr<EventScheduler> event_scheduler;
219234
std::shared_ptr<HTTPClient> http_client;
220235
bool tracing_enabled;
236+
ResourceRenamingMode resource_renaming_mode;
221237
};
222238

223239
// Return a `FinalizedTracerConfig` from the specified `config` and from any

src/datadog/endpoint_guessing.cpp

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#include "endpoint_guessing.h"
2+
3+
#include <cstdint>
4+
5+
namespace datadog::tracing {
6+
7+
namespace {
8+
9+
constexpr size_t MAX_COMPONENTS = 8;
10+
11+
inline constexpr bool is_digit(char c) noexcept { return c >= '0' && c <= '9'; }
12+
inline constexpr bool is_hex_alpha(char c) noexcept {
13+
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
14+
}
15+
inline constexpr bool is_delim(char c) noexcept {
16+
return c == '.' || c == '_' || c == '-';
17+
}
18+
inline constexpr bool is_str_special(char c) noexcept {
19+
return c == '%' || c == '&' || c == '\'' || c == '(' || c == ')' ||
20+
c == '*' || c == '+' || c == ',' || c == ':' || c == '=' || c == '@';
21+
}
22+
23+
/*
24+
clang-format off
25+
{param:int} [1-9][0-9]+ len≥2, digits only, first 1–9
26+
{param:int_id} (?=.*[0-9])[0-9._-]{3,} len≥3, [0-9._-], must contain digit
27+
{param:hex} (?=.*[0-9])[A-Fa-f0-9]{6,} len≥6, hex digits, must contain decimal digit
28+
{param:hex_id} (?=.*[0-9])[A-Fa-f0-9._-]{6,} len≥6, hex+._-, must contain decimal digit
29+
{param:str} .{20,}|.*[%&'()*+,:=@].* any chars, valid if len≥20 or contains special
30+
clang-format on
31+
*/
32+
enum component_type : std::uint8_t {
33+
none = 0,
34+
is_int = 1 << 0,
35+
is_int_id = 1 << 1,
36+
is_hex = 1 << 2,
37+
is_hex_id = 1 << 3,
38+
is_str = 1 << 4,
39+
};
40+
41+
std::string_view to_string(component_type type) noexcept {
42+
switch (type) {
43+
case component_type::is_int:
44+
return "{param:int}";
45+
case component_type::is_int_id:
46+
return "{param:int_id}";
47+
case component_type::is_hex:
48+
return "{param:hex}";
49+
case component_type::is_hex_id:
50+
return "{param:hex_id}";
51+
case component_type::is_str:
52+
return "{param:str}";
53+
default:
54+
return "";
55+
}
56+
}
57+
58+
inline uint8_t bool2mask(bool x) noexcept {
59+
return static_cast<uint8_t>(-int{x}); // 0 -> 0x00, 1 -> 0xFF
60+
}
61+
62+
component_type component_replacement(std::string_view path) noexcept {
63+
// viable_components is a bitset of the component types not yet excluded
64+
std::uint8_t viable_components = 0x1F; // (is_str << 1) - 1
65+
bool found_special_char = false;
66+
bool found_digit = false;
67+
68+
if (path.size() < 2) {
69+
viable_components &= ~(component_type::is_int | component_type::is_int_id |
70+
component_type::is_hex | component_type::is_hex_id);
71+
} else if (path.size() < 3) {
72+
viable_components &= ~(component_type::is_int_id | component_type::is_hex |
73+
component_type::is_hex_id);
74+
} else if (path.size() < 6) {
75+
viable_components &= ~(component_type::is_hex | component_type::is_hex_id);
76+
}
77+
78+
// is_int does not allow a leading 0
79+
if (!path.empty() && path[0] == '0') {
80+
viable_components &= ~component_type::is_int;
81+
}
82+
83+
for (std::size_t i = 0; i < path.size(); ++i) {
84+
char c = path[i];
85+
found_special_char = found_special_char || is_str_special(c);
86+
found_digit = found_digit || is_digit(c);
87+
88+
std::uint8_t digit_mask =
89+
bool2mask(is_digit(c)) &
90+
(component_type::is_int | component_type::is_int_id |
91+
component_type::is_hex | component_type::is_hex_id);
92+
93+
std::uint8_t hex_alpha_mask =
94+
bool2mask(is_hex_alpha(c)) &
95+
(component_type::is_hex | component_type::is_hex_id);
96+
97+
std::uint8_t delimiter_mask =
98+
bool2mask(is_delim(c)) &
99+
(component_type::is_int_id | component_type::is_hex_id);
100+
101+
viable_components &=
102+
(digit_mask | hex_alpha_mask | delimiter_mask | component_type::is_str);
103+
}
104+
105+
// is_str requires a special char or a size >= 20
106+
viable_components &= ~component_type::is_str |
107+
bool2mask(found_special_char || (path.size() >= 20));
108+
// hex, and hex_id require a digit
109+
viable_components &= ~(component_type::is_hex | component_type::is_hex_id) |
110+
bool2mask(found_digit);
111+
112+
if (viable_components == 0) {
113+
return component_type::none;
114+
}
115+
116+
// c++20: use std::countr_zero
117+
std::uint8_t lsb = static_cast<std::uint8_t>(
118+
viable_components &
119+
static_cast<std::uint8_t>(-static_cast<int8_t>(viable_components)));
120+
return static_cast<component_type>(lsb);
121+
}
122+
} // namespace
123+
124+
std::string guess_endpoint(std::string_view orig_path) {
125+
auto path = orig_path;
126+
127+
// remove the query string if any
128+
auto query_pos = path.find('?');
129+
if (query_pos != std::string_view::npos) {
130+
path = path.substr(0, query_pos);
131+
}
132+
133+
if (path.empty() || path.front() != '/') {
134+
return "/";
135+
}
136+
137+
std::string result{};
138+
size_t component_count = 0;
139+
140+
path.remove_prefix(1);
141+
while (!path.empty()) {
142+
auto slash_pos = path.find('/');
143+
144+
std::string_view component = path.substr(0, slash_pos);
145+
146+
// remove current component from the path
147+
if (slash_pos == std::string_view::npos) {
148+
path = std::string_view{};
149+
} else {
150+
path.remove_prefix(slash_pos + 1);
151+
}
152+
153+
if (component.empty()) {
154+
continue;
155+
}
156+
157+
result.append("/");
158+
159+
// replace the literal component with the appropriate placeholder
160+
// (if it matches one of the patterns)
161+
auto type = component_replacement(component);
162+
if (type == component_type::none) {
163+
result.append(component);
164+
} else {
165+
result.append(to_string(type));
166+
}
167+
if (++component_count >= MAX_COMPONENTS) {
168+
break;
169+
}
170+
}
171+
172+
if (result.empty()) {
173+
return "/";
174+
}
175+
176+
return result;
177+
}
178+
179+
} // namespace datadog::tracing

src/datadog/endpoint_guessing.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#pragma once
2+
3+
#include <string>
4+
#include <string_view>
5+
6+
namespace datadog::tracing {
7+
8+
std::string guess_endpoint(std::string_view path);
9+
10+
}

src/datadog/tags.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ const std::string process_id = "process_id";
3030
const std::string language = "language";
3131
const std::string runtime_id = "runtime-id";
3232
const std::string w3c_parent_id = "_dd.parent_id";
33+
const std::string http_endpoint = "http.endpoint";
34+
const std::string http_route = "http.route";
35+
const std::string http_url = "http.url";
3336
const std::string trace_source = "_dd.p.ts";
3437
const std::string apm_enabled = "_dd.apm.enabled";
3538
const std::string ksr = "_dd.p.ksr";

0 commit comments

Comments
 (0)