Skip to content

Commit 415193d

Browse files
authored
fix: regression in sampling delegation handling (#133)
This fixes a regression introduced by Sampling Delegation (#59). When the sampling delegation header is present during the trace context extraction, the sampling priority is discarded even if the feature is disabled. This change of behaviour can potentially result in an increase of ingested spans. Additionally, the tracer should behave as before the introduction of this feature when it is not enabled. Changes: - Sampling priority is not longer discard when`x-datadog-delegate-trace-sampling` is present. - Update sampling delegation tests.
1 parent 5f743f4 commit 415193d

File tree

3 files changed

+125
-97
lines changed

3 files changed

+125
-97
lines changed

src/datadog/extraction_util.cpp

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ namespace datadog {
1717
namespace tracing {
1818
namespace {
1919

20-
constexpr StringView sampling_delegation_request_header =
21-
"x-datadog-delegate-trace-sampling";
22-
2320
// Decode the specified `trace_tags` and integrate them into the specified
2421
// `result`. If an error occurs, add a `tags::internal::propagation_error` tag
2522
// to the specified `span_tags` and log a diagnostic using the specified
@@ -129,29 +126,24 @@ Expected<ExtractedData> extract_datadog(
129126
}
130127
result.parent_id = *parent_id;
131128

129+
if (auto found = headers.lookup("x-datadog-sampling-priority")) {
130+
auto sampling_priority = parse_int(*found, 10);
131+
if (auto* error = sampling_priority.if_error()) {
132+
std::string prefix;
133+
prefix +=
134+
"Could not extract Datadog-style sampling priority from "
135+
"x-datadog-sampling-priority: ";
136+
append(prefix, *found);
137+
prefix += ' ';
138+
return error->with_prefix(prefix);
139+
}
140+
141+
result.sampling_priority = *sampling_priority;
142+
}
143+
132144
if (auto sampling_delegation_header =
133-
headers.lookup(sampling_delegation_request_header)) {
145+
headers.lookup("x-datadog-delegate-trace-sampling")) {
134146
result.delegate_sampling_decision = true;
135-
// If the trace sampling decision is being delegated to us, then we don't
136-
// interpret the sampling priority (if any) included in the request.
137-
} else {
138-
result.delegate_sampling_decision = false;
139-
140-
const StringView sampling_priority_header = "x-datadog-sampling-priority";
141-
if (auto found = headers.lookup(sampling_priority_header)) {
142-
auto sampling_priority = parse_int(*found, 10);
143-
if (auto* error = sampling_priority.if_error()) {
144-
std::string prefix;
145-
prefix += "Could not extract Datadog-style sampling priority from ";
146-
append(prefix, sampling_priority_header);
147-
prefix += ": ";
148-
append(prefix, *found);
149-
prefix += ' ';
150-
return error->with_prefix(prefix);
151-
}
152-
153-
result.sampling_priority = *sampling_priority;
154-
}
155147
}
156148

157149
auto origin = headers.lookup("x-datadog-origin");

src/datadog/tracer.cpp

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,7 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
176176
extracted_contexts.back().headers_examined = audited_reader.entries_found;
177177
}
178178

179-
auto [trace_id, parent_id, origin, trace_tags, delegate_sampling_decision,
180-
sampling_priority, datadog_w3c_parent_id, additional_w3c_tracestate,
181-
additional_datadog_w3c_tracestate, style, headers_examined] =
182-
merge(extracted_contexts);
179+
auto merged_context = merge(extracted_contexts);
183180

184181
// Some information might be missing.
185182
// Here are the combinations considered:
@@ -196,56 +193,60 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
196193
// - trace ID and parent ID means we're extracting a child span
197194
// - if trace ID is zero, then that's an error.
198195

199-
if (!trace_id && !parent_id) {
196+
if (!merged_context.trace_id && !merged_context.parent_id) {
200197
return Error{Error::NO_SPAN_TO_EXTRACT,
201198
"There's neither a trace ID nor a parent span ID to extract."}
202-
.with_prefix(extraction_error_prefix(style, headers_examined));
199+
.with_prefix(extraction_error_prefix(merged_context.style,
200+
merged_context.headers_examined));
203201
}
204-
if (!trace_id) {
202+
if (!merged_context.trace_id) {
205203
std::string message;
206204
message +=
207205
"There's no trace ID to extract, but there is a parent span ID: ";
208-
message += std::to_string(*parent_id);
206+
message += std::to_string(*merged_context.parent_id);
209207
return Error{Error::MISSING_TRACE_ID, std::move(message)}.with_prefix(
210-
extraction_error_prefix(style, headers_examined));
208+
extraction_error_prefix(merged_context.style,
209+
merged_context.headers_examined));
211210
}
212-
if (!parent_id && !origin) {
211+
if (!merged_context.parent_id && !merged_context.origin) {
213212
std::string message;
214213
message +=
215214
"There's no parent span ID to extract, but there is a trace ID: ";
216215
message += "[hexadecimal = ";
217-
message += trace_id->hex_padded();
218-
if (trace_id->high == 0) {
216+
message += merged_context.trace_id->hex_padded();
217+
if (merged_context.trace_id->high == 0) {
219218
message += ", decimal = ";
220-
message += std::to_string(trace_id->low);
219+
message += std::to_string(merged_context.trace_id->low);
221220
}
222221
message += ']';
223222
return Error{Error::MISSING_PARENT_SPAN_ID, std::move(message)}.with_prefix(
224-
extraction_error_prefix(style, headers_examined));
223+
extraction_error_prefix(merged_context.style,
224+
merged_context.headers_examined));
225225
}
226226

227-
if (!parent_id) {
227+
if (!merged_context.parent_id) {
228228
// We have a trace ID, but not parent ID. We're meant to be the root, and
229229
// whoever called us already created a trace ID for us (to correlate with
230230
// whatever they're doing).
231-
parent_id = 0;
231+
merged_context.parent_id = 0;
232232
}
233233

234-
assert(parent_id);
235-
assert(trace_id);
234+
assert(merged_context.parent_id);
235+
assert(merged_context.trace_id);
236236

237-
if (*trace_id == 0) {
237+
if (*merged_context.trace_id == 0) {
238238
return Error{Error::ZERO_TRACE_ID,
239239
"extracted zero value for trace ID, which is invalid"}
240-
.with_prefix(extraction_error_prefix(style, headers_examined));
240+
.with_prefix(extraction_error_prefix(merged_context.style,
241+
merged_context.headers_examined));
241242
}
242243

243244
// We're done extracting fields. Now create the span.
244245
// This is similar to what we do in `create_span`.
245246
span_data->apply_config(*config_manager_->span_defaults(), config, clock_);
246247
span_data->span_id = generator_->span_id();
247-
span_data->trace_id = *trace_id;
248-
span_data->parent_id = *parent_id;
248+
span_data->trace_id = *merged_context.trace_id;
249+
span_data->parent_id = *merged_context.parent_id;
249250

250251
if (span_data->trace_id.high) {
251252
// The trace ID has some bits set in the higher 64 bits. Set the
@@ -255,12 +256,14 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
255256
// First, though, if the `trace_id_high` tag is already set and has a
256257
// bogus value or a value inconsistent with the trace ID, tag an error.
257258
const auto hex_high = hex_padded(span_data->trace_id.high);
258-
const auto extant = std::find_if(
259-
trace_tags.begin(), trace_tags.end(), [&](const auto& pair) {
260-
return pair.first == tags::internal::trace_id_high;
261-
});
262-
if (extant == trace_tags.end()) {
263-
trace_tags.emplace_back(tags::internal::trace_id_high, hex_high);
259+
const auto extant =
260+
std::find_if(merged_context.trace_tags.begin(),
261+
merged_context.trace_tags.end(), [&](const auto& pair) {
262+
return pair.first == tags::internal::trace_id_high;
263+
});
264+
if (extant == merged_context.trace_tags.end()) {
265+
merged_context.trace_tags.emplace_back(tags::internal::trace_id_high,
266+
hex_high);
264267
} else {
265268
// There is already a `trace_id_high` tag. `hex_high` is its proper
266269
// value. Check if the extant value is malformed or different from
@@ -279,14 +282,18 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
279282
}
280283
}
281284

282-
if (datadog_w3c_parent_id) {
283-
span_data->tags[tags::internal::w3c_parent_id] = *datadog_w3c_parent_id;
285+
if (merged_context.datadog_w3c_parent_id) {
286+
span_data->tags[tags::internal::w3c_parent_id] =
287+
*merged_context.datadog_w3c_parent_id;
284288
}
285289

290+
const bool delegate_sampling_decision =
291+
sampling_delegation_enabled_ && merged_context.delegate_sampling_decision;
292+
286293
Optional<SamplingDecision> sampling_decision;
287-
if (sampling_priority) {
294+
if (!delegate_sampling_decision && merged_context.sampling_priority) {
288295
SamplingDecision decision;
289-
decision.priority = *sampling_priority;
296+
decision.priority = *merged_context.sampling_priority;
290297
// `decision.mechanism` is null. We might be able to infer it once we
291298
// extract `trace_tags`, but we would have no use for it, so we won't.
292299
decision.origin = SamplingDecision::Origin::EXTRACTED;
@@ -300,10 +307,12 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
300307
logger_, collector_, tracer_telemetry_, config_manager_->trace_sampler(),
301308
span_sampler_, config_manager_->span_defaults(), config_manager_,
302309
runtime_id_, sampling_delegation_enabled_, delegate_sampling_decision,
303-
injection_styles_, hostname_, std::move(origin), tags_header_max_size_,
304-
std::move(trace_tags), std::move(sampling_decision),
305-
std::move(additional_w3c_tracestate),
306-
std::move(additional_datadog_w3c_tracestate), std::move(span_data));
310+
injection_styles_, hostname_, std::move(merged_context.origin),
311+
tags_header_max_size_, std::move(merged_context.trace_tags),
312+
std::move(sampling_decision),
313+
std::move(merged_context.additional_w3c_tracestate),
314+
std::move(merged_context.additional_datadog_w3c_tracestate),
315+
std::move(span_data));
307316
Span span{span_data_ptr, segment,
308317
[generator = generator_]() { return generator->span_id(); },
309318
clock_};

test/test_tracer.cpp

Lines changed: 64 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -445,16 +445,6 @@ TEST_CASE("span extraction") {
445445
TraceID(123),
446446
456,
447447
2},
448-
{__LINE__,
449-
"datadog style with delegate header",
450-
{PropagationStyle::DATADOG},
451-
{{"x-datadog-trace-id", "123"},
452-
{"x-datadog-parent-id", "456"},
453-
{"x-datadog-delegate-trace-sampling", "delegate"},
454-
{"x-datadog-sampling-priority", "3"}},
455-
TraceID(123),
456-
456,
457-
nullopt},
458448
{__LINE__,
459449
"datadog style without sampling priority",
460450
{PropagationStyle::DATADOG},
@@ -1204,7 +1194,12 @@ TEST_CASE("span extraction") {
12041194
MockDictReader reader{headers};
12051195
auto span = tracer.extract_span(reader);
12061196
REQUIRE(span);
1207-
REQUIRE(!span->trace_segment().sampling_decision());
1197+
1198+
if (*config.delegate_trace_sampling) {
1199+
REQUIRE(!span->trace_segment().sampling_decision());
1200+
} else {
1201+
REQUIRE(span->trace_segment().sampling_decision());
1202+
}
12081203

12091204
MockDictWriter writer;
12101205
span->inject(writer);
@@ -1409,6 +1404,49 @@ TEST_CASE(
14091404
test_case.expected_error_prefix + test_case.tid_tag_value);
14101405
}
14111406

1407+
TEST_CASE("sampling delegation extraction") {
1408+
const bool enable_sampling_delegation = GENERATE(true, false);
1409+
1410+
CAPTURE(enable_sampling_delegation);
1411+
1412+
const auto logger = std::make_shared<NullLogger>();
1413+
const auto collector = std::make_shared<NullCollector>();
1414+
1415+
TracerConfig config;
1416+
config.service = "test-sampling-delegation";
1417+
config.logger = logger;
1418+
config.collector = collector;
1419+
config.extraction_styles = {PropagationStyle::DATADOG};
1420+
config.trace_sampler.sample_rate = 1.;
1421+
config.delegate_trace_sampling = enable_sampling_delegation;
1422+
1423+
auto validated_config = finalize_config(config);
1424+
REQUIRE(validated_config);
1425+
1426+
Tracer tracer(*validated_config);
1427+
1428+
const std::unordered_map<std::string, std::string> headers{
1429+
{"x-datadog-trace-id", "17491188783264004180"},
1430+
{"x-datadog-parent-id", "3390700340160032468"},
1431+
{"x-datadog-sampling-priority", "-1"},
1432+
{"x-datadog-tags", "_dd.p.tid=66718e8c00000000"},
1433+
{"x-datadog-delegate-trace-sampling", "delegate"},
1434+
};
1435+
1436+
MockDictReader propagation_reader{headers};
1437+
const auto maybe_span = tracer.extract_span(propagation_reader);
1438+
REQUIRE(maybe_span);
1439+
1440+
auto sampling_decision = maybe_span->trace_segment().sampling_decision();
1441+
if (enable_sampling_delegation) {
1442+
CHECK(!sampling_decision.has_value());
1443+
} else {
1444+
REQUIRE(sampling_decision.has_value());
1445+
CHECK(sampling_decision->origin == SamplingDecision::Origin::EXTRACTED);
1446+
CHECK(sampling_decision->priority == int(SamplingPriority::USER_DROP));
1447+
}
1448+
}
1449+
14121450
TEST_CASE("_dd.is_sampling_decider") {
14131451
// This test involves three tracers: "service1", "service2", and "service3".
14141452
// Each calls the next, and each produces two spans: "local_root" and "child".
@@ -1451,6 +1489,7 @@ TEST_CASE("_dd.is_sampling_decider") {
14511489
config2.collector = collector;
14521490
config2.logger = logger;
14531491
config2.service = "service2";
1492+
config2.trace_sampler.sample_rate = 1; // keep all traces
14541493
config2.delegate_trace_sampling = true;
14551494

14561495
TracerConfig config3;
@@ -1580,7 +1619,9 @@ TEST_CASE("_dd.is_sampling_decider") {
15801619
}
15811620
REQUIRE(span.service != "service1");
15821621
if (span.service == "service2" && span.name == "local_root") {
1583-
REQUIRE(span.tags.count(tags::internal::sampling_decider) == 0);
1622+
const bool made_the_decision = service3_delegation_enabled ? 0 : 1;
1623+
REQUIRE(span.tags.count(tags::internal::sampling_decider) ==
1624+
made_the_decision);
15841625
REQUIRE(span.numeric_tags.count(tags::internal::sampling_priority) ==
15851626
1);
15861627
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) ==
@@ -1595,8 +1636,9 @@ TEST_CASE("_dd.is_sampling_decider") {
15951636
}
15961637
REQUIRE(span.service != "service2");
15971638
if (span.service == "service3" && span.name == "local_root") {
1598-
REQUIRE(span.tags.count(tags::internal::sampling_decider) == 1);
1599-
REQUIRE(span.tags.at(tags::internal::sampling_decider) == "1");
1639+
const bool made_the_decision = service3_delegation_enabled ? 1 : 0;
1640+
REQUIRE(span.tags.count(tags::internal::sampling_decider) ==
1641+
made_the_decision);
16001642
REQUIRE(span.numeric_tags.count(tags::internal::sampling_priority) ==
16011643
1);
16021644
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) ==
@@ -1649,27 +1691,13 @@ TEST_CASE("sampling delegation is not an override") {
16491691
//
16501692
// The idea is that service2 does not perform delegation when service1 already
16511693
// made a decision and did not request delegation.
1652-
auto service1_sampling_priority =
1653-
GENERATE(values<Optional<int>>({nullopt, -1, 0, 1, 2}));
16541694
auto service1_delegate = GENERATE(true, false);
16551695
auto service3_sample_rate = GENERATE(0.0, 1.0);
16561696

1657-
int expected_sampling_priority;
1658-
if (service1_sampling_priority.has_value() && !service1_delegate) {
1659-
// Service1 made a decision and didn't ask for delegation, so that's the
1660-
// decision.
1661-
expected_sampling_priority = *service1_sampling_priority;
1662-
} else {
1663-
// Service1 either didn't make a decision or did request delegation, so the
1664-
// decision will be delegated through service2 to service3, whose decision
1665-
// depends on its configured sample rate.
1666-
expected_sampling_priority = service3_sample_rate == 0.0 ? -1 : 2;
1667-
}
1697+
const int service3_sampling_priority = service3_sample_rate == 0.0 ? -1 : 2;
16681698

1669-
CAPTURE(service1_sampling_priority);
16701699
CAPTURE(service1_delegate);
16711700
CAPTURE(service3_sample_rate);
1672-
CAPTURE(expected_sampling_priority);
16731701

16741702
const auto collector = std::make_shared<MockCollector>();
16751703
const auto logger = std::make_shared<MockLogger>();
@@ -1697,6 +1725,7 @@ TEST_CASE("sampling delegation is not an override") {
16971725
config3.logger = logger;
16981726
config3.extraction_styles = config1.injection_styles = styles;
16991727
config3.service = "service3";
1728+
config3.delegate_trace_sampling = true;
17001729
config3.trace_sampler.sample_rate = service3_sample_rate;
17011730

17021731
auto valid_config = finalize_config(config1);
@@ -1725,18 +1754,16 @@ TEST_CASE("sampling delegation is not an override") {
17251754
span_config.name = "local_root";
17261755
Span span1 = tracer1.create_span(span_config);
17271756
span1.inject(propagation_writer);
1728-
if (service1_sampling_priority.has_value()) {
1729-
propagation_writer.items["x-datadog-sampling-priority"] =
1730-
std::to_string(*service1_sampling_priority);
1731-
} else {
1732-
propagation_writer.items.erase("x-datadog-sampling-priority");
1733-
}
17341757

17351758
{
17361759
auto span2 = tracer2.extract_span(propagation_reader, span_config);
17371760
REQUIRE(span2);
17381761
propagation_writer.items.clear();
17391762
span2->inject(propagation_writer);
1763+
const bool expected_delegate_header = service1_delegate ? 1 : 0;
1764+
CHECK(
1765+
propagation_writer.items.count("x-datadog-delegate-trace-sampling") ==
1766+
expected_delegate_header);
17401767

17411768
{
17421769
auto span3 = tracer3.extract_span(propagation_reader, span_config);
@@ -1767,11 +1794,11 @@ TEST_CASE("sampling delegation is not an override") {
17671794
// If `service1_delegate` is false, then service1's sampling decision was
17681795
// made by the service1's sampler, which will result in priority 2.
17691796
// Otherwise, it's the same priority expected for the other spans.
1770-
if (span.service == "service1" && !service1_delegate) {
1797+
if (!service1_delegate) {
17711798
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) == 2);
17721799
} else {
17731800
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) ==
1774-
expected_sampling_priority);
1801+
service3_sampling_priority);
17751802
}
17761803
}
17771804
}

0 commit comments

Comments
 (0)