Skip to content

Commit f590dce

Browse files
authored
perf: parse w3c traceparent using a customer parser (#178)
std::regex is notoriously known to be slow. This custom parser implementation offers significantly better performance.
1 parent 7222d8d commit f590dce

File tree

3 files changed

+90
-59
lines changed

3 files changed

+90
-59
lines changed

fuzz/w3c-propagation/fuzz.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
#include <cstdlib>
1111
#include <functional>
1212
#include <memory>
13-
#include <string>
14-
#include <unordered_map>
1513

1614
namespace dd = datadog::tracing;
1715

@@ -22,6 +20,7 @@ dd::Tracer& tracer_singleton() {
2220
dd::TracerConfig config;
2321
config.service = "fuzzer";
2422
config.collector = std::make_shared<dd::NullCollector>();
23+
config.extraction_styles = {dd::PropagationStyle::W3C};
2524

2625
const auto finalized_config = dd::finalize_config(config);
2726
if (!finalized_config) {

src/datadog/w3c_propagation.cpp

Lines changed: 75 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include <algorithm>
66
#include <cassert>
77
#include <cstddef>
8-
#include <regex>
98
#include <utility>
109

1110
#include "hex.h"
@@ -17,18 +16,6 @@ namespace datadog {
1716
namespace tracing {
1817
namespace {
1918

20-
// Note that match group 0 is the entire match.
21-
constexpr StringView k_traceparent_pattern =
22-
"([0-9a-f]{2})" // hex version number (match group 1)
23-
"-"
24-
"([0-9a-f]{32})" // hex trace ID (match group 2)
25-
"-"
26-
"([0-9a-f]{16})" // hex parent span ID (match group 3)
27-
"-"
28-
"([0-9a-f]{2})" // hex "trace-flags" (match group 4)
29-
"($|-.*)"; // either the end, or a hyphen preceding further fields (match
30-
// group 5)
31-
3219
// Return a predicate that returns whether its `char` argument is any of the
3320
// following:
3421
//
@@ -49,55 +36,82 @@ auto verboten(int lowest_ascii, int highest_ascii,
4936
// entry of the specified `headers`. Return `nullopt` on success. Return a value
5037
// for the `tags::internal::w3c_extraction_error` tag if an error occurs.
5138
Optional<std::string> extract_traceparent(ExtractedData& result,
52-
const DictReader& headers) {
53-
const auto maybe_traceparent = headers.lookup("traceparent");
54-
if (!maybe_traceparent) {
55-
return nullopt;
39+
StringView traceparent) {
40+
enum class state : char {
41+
version,
42+
trace_id,
43+
parent_span_id,
44+
trace_flags
45+
} internal_state = state::version;
46+
47+
if (traceparent.size() < 55) return "malformed_traceparent";
48+
49+
StringView version;
50+
std::size_t beg = 0;
51+
for (std::size_t i = 0; i < traceparent.size(); ++i) {
52+
switch (internal_state) {
53+
case state::version: {
54+
if (i > 2) return "malformed_traceparent";
55+
if (traceparent[i] == '-') {
56+
version = StringView(traceparent.data() + beg, i - beg);
57+
if (version == "ff") return "invalid_version";
58+
59+
beg = i + 1;
60+
internal_state = state::trace_id;
61+
}
62+
} break;
63+
64+
case state::trace_id: {
65+
if (i > 35) return "malformed_traceparent";
66+
if (traceparent[i] == '-') {
67+
auto maybe_trace_id =
68+
TraceID::parse_hex(StringView(traceparent.data() + beg, i - beg));
69+
if (maybe_trace_id.if_error() || *maybe_trace_id == 0)
70+
return "malformed_traceid";
71+
72+
result.trace_id = *maybe_trace_id;
73+
74+
beg = i + 1;
75+
internal_state = state::parent_span_id;
76+
}
77+
} break;
78+
79+
case state::parent_span_id: {
80+
if (i > 52) return "malformed_traceparent";
81+
if (traceparent[i] == '-') {
82+
auto maybe_parent_id =
83+
parse_uint64(StringView(traceparent.data() + beg, i - beg), 16);
84+
if (maybe_parent_id.if_error() || *maybe_parent_id == 0)
85+
return "malformed_parentid";
86+
87+
result.parent_id = *maybe_parent_id;
88+
89+
beg = i + 1;
90+
internal_state = state::trace_flags;
91+
goto handle_trace_flag;
92+
}
93+
} break;
94+
95+
default:
96+
break;
97+
}
5698
}
5799

58-
const auto traceparent = trim(*maybe_traceparent);
59-
60-
static const std::regex regex{k_traceparent_pattern.data()};
61-
62-
std::cmatch match;
63-
if (!std::regex_match(traceparent.data(),
64-
traceparent.data() + traceparent.size(), match,
65-
regex)) {
100+
if (internal_state != state::trace_flags) {
66101
return "malformed_traceparent";
67102
}
68103

69-
assert(match.ready());
70-
assert(match.size() == 6);
71-
72-
const auto to_string_view = [traceparent_beg = traceparent.data()](
73-
const std::cmatch& match,
74-
const std::size_t index) {
75-
assert(index < match.size());
76-
return StringView(traceparent_beg + match.position(index),
77-
std::size_t(match.length(index)));
78-
};
79-
80-
const auto version = to_string_view(match, 1);
81-
if (version == "ff") {
82-
return "invalid_version";
83-
}
84-
85-
if (version == "00" && !to_string_view(match, 5).empty()) {
104+
handle_trace_flag:
105+
auto left = traceparent.size() - beg;
106+
if (left < 2 ||
107+
(left > 2 && (version == "00" || traceparent[beg + 2] != '-')))
86108
return "malformed_traceparent";
87-
}
88109

89-
result.trace_id = *TraceID::parse_hex(to_string_view(match, 2));
90-
if (result.trace_id == 0) {
91-
return "trace_id_zero";
92-
}
93-
94-
result.parent_id = *parse_uint64(to_string_view(match, 3), 16);
95-
if (*result.parent_id == 0) {
96-
return "parent_id_zero";
97-
}
110+
auto maybe_trace_flags =
111+
parse_uint64(StringView(traceparent.data() + beg, 2), 16);
112+
if (maybe_trace_flags.if_error()) return "malformed_traceflags";
98113

99-
const auto flags = *parse_uint64(to_string_view(match, 4), 16);
100-
result.sampling_priority = int(flags & 1);
114+
result.sampling_priority = *maybe_trace_flags & 0x01;
101115

102116
return nullopt;
103117
}
@@ -284,7 +298,13 @@ Expected<ExtractedData> extract_w3c(
284298
ExtractedData result;
285299
result.style = PropagationStyle::W3C;
286300

287-
if (auto error_tag_value = extract_traceparent(result, headers)) {
301+
const auto maybe_traceparent = headers.lookup("traceparent");
302+
if (!maybe_traceparent) {
303+
return ExtractedData{};
304+
}
305+
306+
if (auto error_tag_value =
307+
extract_traceparent(result, trim(*maybe_traceparent))) {
288308
span_tags[tags::internal::w3c_extraction_error] =
289309
std::move(*error_tag_value);
290310
return ExtractedData{};

test/test_tracer.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -730,15 +730,27 @@ TEST_CASE("span extraction") {
730730

731731
{__LINE__, "invalid: trace ID zero",
732732
"00-00000000000000000000000000000000-00f067aa0ba902b7-00", // traceparent
733-
"trace_id_zero"}, // expected_error_tag_value
733+
"malformed_traceid"}, // expected_error_tag_value
734734

735735
{__LINE__, "invalid: parent ID zero",
736736
"00-4bf92f3577b34da6a3ce929d0e0e4736-0000000000000000-00", // traceparent
737-
"parent_id_zero"}, // expected_error_tag_value
737+
"malformed_parentid"}, // expected_error_tag_value
738738

739739
{__LINE__, "invalid: trailing characters when version is zero",
740740
"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-00-foo", // traceparent
741741
"malformed_traceparent"}, // expected_error_tag_value
742+
743+
{__LINE__, "invalid: non hex trace ID",
744+
"00-abcdefghijklmnopqrstuvxyzabcdefg-00f067aa0ba902b7-00", // traceparent
745+
"malformed_traceid"}, // expected_error_tag_value
746+
747+
{__LINE__, "invalid: non hex parent ID",
748+
"00-4bf92f3577b34da6a3ce929d0e0e4736-abcdefghijklmnop-00", // traceparent
749+
"malformed_parentid"}, // expected_error_tag_value
750+
751+
{__LINE__, "invalid: non hex trace tag ID",
752+
"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-xy", // traceparent
753+
"malformed_traceflags"}, // expected_error_tag_value
742754
}));
743755
// clang-format on
744756

0 commit comments

Comments
 (0)