Skip to content

Commit 4080fa5

Browse files
authored
heterogeneous trace context extraction (#72)
* more context in extraction error messages * (unrelated) update library version in example * move extraction code into its own component * remove copy/pasted comments * eats, shoots, and leaves * bin/format should forward arguments to clang-format This is used by bin/check to do a "dry run" format. * default propagation style is now [Datadog, W3C] * heterogeneous extraction, untested * unit test for heterogeneous extraction * missed a spot when moving code into extraction_util * don't lie to your teammates * trade a conditional jump for an addition
1 parent 9781acf commit 4080fa5

File tree

12 files changed

+538
-198
lines changed

12 files changed

+538
-198
lines changed

BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ cc_library(
1111
"src/datadog/default_http_client_null.cpp",
1212
"src/datadog/environment.cpp",
1313
"src/datadog/error.cpp",
14+
"src/datadog/extraction_util.cpp",
1415
"src/datadog/glob.cpp",
1516
"src/datadog/id_generator.cpp",
1617
"src/datadog/limiter.cpp",
@@ -59,6 +60,7 @@ cc_library(
5960
"src/datadog/event_scheduler.h",
6061
"src/datadog/expected.h",
6162
"src/datadog/extracted_data.h",
63+
"src/datadog/extraction_util.h",
6264
"src/datadog/glob.h",
6365
"src/datadog/hex.h",
6466
"src/datadog/http_client.h",

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ target_sources(dd_trace_cpp-objects PRIVATE
106106
# src/datadog/default_http_client_null.cpp use libcurl
107107
src/datadog/environment.cpp
108108
src/datadog/error.cpp
109+
src/datadog/extraction_util.cpp
109110
src/datadog/glob.cpp
110111
src/datadog/id_generator.cpp
111112
src/datadog/limiter.cpp
@@ -160,6 +161,7 @@ target_sources(dd_trace_cpp-objects PUBLIC
160161
src/datadog/event_scheduler.h
161162
src/datadog/expected.h
162163
src/datadog/extracted_data.h
164+
src/datadog/extraction_util.h
163165
src/datadog/glob.h
164166
src/datadog/hex.h
165167
src/datadog/http_client.h

bin/format

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ cd "$(dirname "$0")"/..
1111
# occasionally bumps the required version, reformatting everything.
1212
version=14
1313
formatter=clang-format-$version
14-
formatter_options="--style=file -i"
14+
formatter_options="--style=file -i $*"
1515

1616
find_sources() {
1717
find src/ examples/ test/ fuzz/ -type f \( -name '*.h' -o -name '*.cpp' \) "$@"

examples/http-server/server/install-dd-trace-cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55

66
# Adjust for the latest release.
77
# See <https://github.com/DataDog/dd-trace-cpp/releases/latest>.
8-
VERSION_TAG=v0.1.9
8+
VERSION_TAG=v0.1.10
99

1010
cd /tmp
1111
git clone --branch "$VERSION_TAG" 'https://github.com/datadog/dd-trace-cpp'

src/datadog/extracted_data.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <vector>
1010

1111
#include "optional.h"
12+
#include "propagation_style.h"
1213
#include "trace_id.h"
1314

1415
namespace datadog {
@@ -33,6 +34,13 @@ struct ExtractedData {
3334
// `additional_datadog_w3c_tracestate` is null.
3435
// `additional_datadog_w3c_tracestate` is used for the `W3C` injection style.
3536
Optional<std::string> additional_datadog_w3c_tracestate;
37+
// `style` is the extraction style used to obtain this `ExtractedData`. It's
38+
// for diagnostics.
39+
Optional<PropagationStyle> style;
40+
// `headers_examined` are the name/value pairs of HTTP headers (or equivalent
41+
// request meta-data) that were looked up and had values during the
42+
// preparation of this `ExtractedData`. It's for diagnostics.
43+
std::vector<std::pair<std::string, std::string>> headers_examined;
3644
};
3745

3846
} // namespace tracing

src/datadog/extraction_util.cpp

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
#include "extraction_util.h"
2+
3+
#include <algorithm>
4+
#include <cstdint>
5+
#include <sstream>
6+
#include <string>
7+
#include <unordered_map>
8+
9+
#include "extracted_data.h"
10+
#include "json.hpp"
11+
#include "logger.h"
12+
#include "parse_util.h"
13+
#include "tag_propagation.h"
14+
#include "tags.h"
15+
16+
namespace datadog {
17+
namespace tracing {
18+
19+
Optional<std::uint64_t> parse_trace_id_high(const std::string& value) {
20+
if (value.size() != 16) {
21+
return nullopt;
22+
}
23+
24+
auto high = parse_uint64(value, 16);
25+
if (high) {
26+
return *high;
27+
}
28+
29+
return nullopt;
30+
}
31+
32+
void handle_trace_tags(StringView trace_tags, ExtractedData& result,
33+
std::unordered_map<std::string, std::string>& span_tags,
34+
Logger& logger) {
35+
auto maybe_trace_tags = decode_tags(trace_tags);
36+
if (auto* error = maybe_trace_tags.if_error()) {
37+
logger.log_error(*error);
38+
span_tags[tags::internal::propagation_error] = "decoding_error";
39+
return;
40+
}
41+
42+
for (auto& [key, value] : *maybe_trace_tags) {
43+
if (!starts_with(key, "_dd.p.")) {
44+
continue;
45+
}
46+
47+
if (key == tags::internal::trace_id_high) {
48+
// _dd.p.tid contains the high 64 bits of the trace ID.
49+
const Optional<std::uint64_t> high = parse_trace_id_high(value);
50+
if (!high) {
51+
span_tags[tags::internal::propagation_error] = "malformed_tid " + value;
52+
continue;
53+
}
54+
55+
if (result.trace_id) {
56+
// Note that this assumes the lower 64 bits of the trace ID have already
57+
// been extracted (i.e. we look for X-Datadog-Trace-ID first).
58+
result.trace_id->high = *high;
59+
}
60+
}
61+
62+
result.trace_tags.emplace_back(std::move(key), std::move(value));
63+
}
64+
}
65+
66+
Expected<Optional<std::uint64_t>> extract_id_header(const DictReader& headers,
67+
StringView header,
68+
StringView header_kind,
69+
StringView style_name,
70+
int base) {
71+
auto found = headers.lookup(header);
72+
if (!found) {
73+
return nullopt;
74+
}
75+
auto result = parse_uint64(*found, base);
76+
if (auto* error = result.if_error()) {
77+
std::string prefix;
78+
prefix += "Could not extract ";
79+
append(prefix, style_name);
80+
prefix += "-style ";
81+
append(prefix, header_kind);
82+
prefix += "ID from ";
83+
append(prefix, header);
84+
prefix += ": ";
85+
append(prefix, *found);
86+
prefix += ' ';
87+
return error->with_prefix(prefix);
88+
}
89+
return *result;
90+
}
91+
92+
Expected<ExtractedData> extract_datadog(
93+
const DictReader& headers,
94+
std::unordered_map<std::string, std::string>& span_tags, Logger& logger) {
95+
ExtractedData result;
96+
result.style = PropagationStyle::DATADOG;
97+
98+
auto trace_id =
99+
extract_id_header(headers, "x-datadog-trace-id", "trace", "Datadog", 10);
100+
if (auto* error = trace_id.if_error()) {
101+
return std::move(*error);
102+
}
103+
if (*trace_id) {
104+
result.trace_id = TraceID(**trace_id);
105+
}
106+
107+
auto parent_id = extract_id_header(headers, "x-datadog-parent-id",
108+
"parent span", "Datadog", 10);
109+
if (auto* error = parent_id.if_error()) {
110+
return std::move(*error);
111+
}
112+
result.parent_id = *parent_id;
113+
114+
const StringView sampling_priority_header = "x-datadog-sampling-priority";
115+
if (auto found = headers.lookup(sampling_priority_header)) {
116+
auto sampling_priority = parse_int(*found, 10);
117+
if (auto* error = sampling_priority.if_error()) {
118+
std::string prefix;
119+
prefix += "Could not extract Datadog-style sampling priority from ";
120+
append(prefix, sampling_priority_header);
121+
prefix += ": ";
122+
append(prefix, *found);
123+
prefix += ' ';
124+
return error->with_prefix(prefix);
125+
}
126+
result.sampling_priority = *sampling_priority;
127+
}
128+
129+
auto origin = headers.lookup("x-datadog-origin");
130+
if (origin) {
131+
result.origin = std::string(*origin);
132+
}
133+
134+
auto trace_tags = headers.lookup("x-datadog-tags");
135+
if (trace_tags) {
136+
handle_trace_tags(*trace_tags, result, span_tags, logger);
137+
}
138+
139+
return result;
140+
}
141+
142+
Expected<ExtractedData> extract_b3(
143+
const DictReader& headers, std::unordered_map<std::string, std::string>&,
144+
Logger&) {
145+
ExtractedData result;
146+
result.style = PropagationStyle::B3;
147+
148+
if (auto found = headers.lookup("x-b3-traceid")) {
149+
auto parsed = TraceID::parse_hex(*found);
150+
if (auto* error = parsed.if_error()) {
151+
std::string prefix = "Could not extract B3-style trace ID from \"";
152+
append(prefix, *found);
153+
prefix += "\": ";
154+
return error->with_prefix(prefix);
155+
}
156+
result.trace_id = *parsed;
157+
}
158+
159+
auto parent_id =
160+
extract_id_header(headers, "x-b3-spanid", "parent span", "B3", 16);
161+
if (auto* error = parent_id.if_error()) {
162+
return std::move(*error);
163+
}
164+
result.parent_id = *parent_id;
165+
166+
const StringView sampling_priority_header = "x-b3-sampled";
167+
if (auto found = headers.lookup(sampling_priority_header)) {
168+
auto sampling_priority = parse_int(*found, 10);
169+
if (auto* error = sampling_priority.if_error()) {
170+
std::string prefix;
171+
prefix += "Could not extract B3-style sampling priority from ";
172+
append(prefix, sampling_priority_header);
173+
prefix += ": ";
174+
append(prefix, *found);
175+
prefix += ' ';
176+
return error->with_prefix(prefix);
177+
}
178+
result.sampling_priority = *sampling_priority;
179+
}
180+
181+
return result;
182+
}
183+
184+
Expected<ExtractedData> extract_none(
185+
const DictReader&, std::unordered_map<std::string, std::string>&, Logger&) {
186+
ExtractedData result;
187+
result.style = PropagationStyle::NONE;
188+
return result;
189+
}
190+
191+
std::string extraction_error_prefix(
192+
const Optional<PropagationStyle>& style,
193+
const std::vector<std::pair<std::string, std::string>>& headers_examined) {
194+
std::ostringstream stream;
195+
stream << "While extracting trace context";
196+
if (style) {
197+
stream << " in the " << to_json(*style) << " propagation style";
198+
}
199+
auto it = headers_examined.begin();
200+
if (it != headers_examined.end()) {
201+
stream << " from the following headers: [";
202+
stream << nlohmann::json(it->first + ": " + it->second);
203+
for (++it; it != headers_examined.end(); ++it) {
204+
stream << ", ";
205+
stream << nlohmann::json(it->first + ": " + it->second);
206+
}
207+
stream << "]";
208+
}
209+
stream << ", an error occurred: ";
210+
return stream.str();
211+
}
212+
213+
AuditedReader::AuditedReader(const DictReader& underlying)
214+
: underlying(underlying) {}
215+
216+
Optional<StringView> AuditedReader::lookup(StringView key) const {
217+
auto value = underlying.lookup(key);
218+
if (value) {
219+
entries_found.emplace_back(key, *value);
220+
}
221+
return value;
222+
}
223+
224+
void AuditedReader::visit(
225+
const std::function<void(StringView key, StringView value)>& visitor)
226+
const {
227+
underlying.visit([&, this](StringView key, StringView value) {
228+
entries_found.emplace_back(key, value);
229+
visitor(key, value);
230+
});
231+
}
232+
233+
ExtractedData merge(const std::vector<ExtractedData>& contexts) {
234+
ExtractedData result;
235+
236+
const auto found = std::find_if(
237+
contexts.begin(), contexts.end(),
238+
[](const ExtractedData& data) { return data.trace_id.has_value(); });
239+
240+
if (found == contexts.end()) {
241+
// Nothing extracted a trace ID. Return the first context that includes a
242+
// parent ID, if any, or otherwise just return an empty `ExtractedData`.
243+
// The purpose of looking for a parent ID is to allow for the error
244+
// "extracted a parent ID without a trace ID," if that's what happened.
245+
const auto other = std::find_if(
246+
contexts.begin(), contexts.end(),
247+
[](const ExtractedData& data) { return data.parent_id.has_value(); });
248+
if (other != contexts.end()) {
249+
result = *other;
250+
}
251+
return result;
252+
}
253+
254+
// `found` refers to the first extracted context that yielded a trace ID.
255+
// This will be our main context.
256+
//
257+
// If the style of `found` is not W3C, then examine the remaining contexts
258+
// for W3C-style tracestate that we might want to include in `result`.
259+
result = *found;
260+
if (result.style == PropagationStyle::W3C) {
261+
return result;
262+
}
263+
264+
const auto other =
265+
std::find_if(found + 1, contexts.end(), [&](const ExtractedData& data) {
266+
return data.style == PropagationStyle::W3C &&
267+
data.trace_id == found->trace_id;
268+
});
269+
270+
if (other != contexts.end()) {
271+
result.additional_w3c_tracestate = other->additional_w3c_tracestate;
272+
result.additional_datadog_w3c_tracestate =
273+
other->additional_datadog_w3c_tracestate;
274+
result.headers_examined.insert(result.headers_examined.end(),
275+
other->headers_examined.begin(),
276+
other->headers_examined.end());
277+
}
278+
279+
return result;
280+
}
281+
282+
} // namespace tracing
283+
} // namespace datadog

0 commit comments

Comments
 (0)