Skip to content

Commit f6b9333

Browse files
authored
fix: trace sampling rules order (#125)
Reset trace sampling rules legacy order.
1 parent 0ada79d commit f6b9333

File tree

8 files changed

+103
-88
lines changed

8 files changed

+103
-88
lines changed

src/datadog/config_manager.cpp

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ namespace datadog {
88
namespace tracing {
99
namespace {
1010

11-
using Rules =
12-
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>;
11+
using Rules = std::vector<TraceSamplerRule>;
1312

1413
Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
1514
Rules parsed_rules;
@@ -27,7 +26,9 @@ Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
2726
return error->with_prefix(prefix);
2827
}
2928

30-
TraceSamplerRate rate;
29+
TraceSamplerRule rule;
30+
rule.matcher = std::move(*matcher);
31+
3132
if (auto sample_rate = json_rule.find("sample_rate");
3233
sample_rate != json_rule.end()) {
3334
type = sample_rate->type_name();
@@ -42,7 +43,10 @@ Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
4243
return *error;
4344
}
4445

45-
rate.value = *maybe_rate;
46+
rule.rate = *maybe_rate;
47+
} else {
48+
return Error{Error::TRACE_SAMPLING_RULES_INVALID_JSON,
49+
"Missing \"sample_rate\" field"};
4650
}
4751

4852
if (auto provenance_it = json_rule.find("provenance");
@@ -53,15 +57,21 @@ Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
5357
std::move(message)};
5458
}
5559

56-
auto provenance = provenance_it->get<std::string_view>();
60+
auto provenance = to_lower(provenance_it->get<StringView>());
5761
if (provenance == "customer") {
58-
rate.mechanism = SamplingMechanism::REMOTE_RULE;
62+
rule.mechanism = SamplingMechanism::REMOTE_RULE;
5963
} else if (provenance == "dynamic") {
60-
rate.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE;
64+
rule.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE;
65+
} else {
66+
return Error{Error::TRACE_SAMPLING_RULES_UNKNOWN_PROPERTY,
67+
"Unknown \"provenance\" value"};
6168
}
69+
} else {
70+
return Error{Error::TRACE_SAMPLING_RULES_INVALID_JSON,
71+
"Missing \"provenance\" field"};
6272
}
6373

64-
parsed_rules.emplace(std::move(*matcher), std::move(rate));
74+
parsed_rules.emplace_back(std::move(rule));
6575
}
6676

6777
return parsed_rules;
@@ -98,7 +108,19 @@ std::vector<ConfigMetadata> ConfigManager::update(const ConfigUpdate& conf) {
98108

99109
std::lock_guard<std::mutex> lock(mutex_);
100110

101-
decltype(rules_) rules;
111+
// NOTE(@dmehala): Sampling rules are generally not well specified.
112+
//
113+
// Rules are evaluated in the order they are inserted, which means the most
114+
// specific matching rule might not be evaluated, even though it should be.
115+
// For now, we must follow this legacy behavior.
116+
//
117+
// Additionally, I exploit this behavior to avoid a merge operation.
118+
// The resulting array can contain duplicate `SpanMatcher`, but only the first
119+
// encountered one will be evaluated, acting as an override.
120+
//
121+
// Remote Configuration rules will/should always be placed at the begining of
122+
// the array, ensuring they are evaluated first.
123+
auto rules = rules_;
102124

103125
if (!conf.trace_sampling_rate) {
104126
auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RATE);
@@ -112,7 +134,12 @@ std::vector<ConfigMetadata> ConfigManager::update(const ConfigUpdate& conf) {
112134
ConfigMetadata::Origin::REMOTE_CONFIG);
113135

114136
auto rate = Rate::from(*conf.trace_sampling_rate);
115-
rules[catch_all] = TraceSamplerRate{*rate, SamplingMechanism::RULE};
137+
138+
TraceSamplerRule rule;
139+
rule.rate = *rate;
140+
rule.matcher = catch_all;
141+
rule.mechanism = SamplingMechanism::RULE;
142+
rules.emplace(rules.cbegin(), std::move(rule));
116143

117144
metadata.emplace_back(std::move(trace_sampling_metadata));
118145
}
@@ -131,14 +158,13 @@ std::vector<ConfigMetadata> ConfigManager::update(const ConfigUpdate& conf) {
131158
if (auto error = maybe_rules.if_error()) {
132159
trace_sampling_rules_metadata.error = std::move(*error);
133160
} else {
134-
rules.merge(*maybe_rules);
161+
rules.insert(rules.cbegin(), maybe_rules->begin(), maybe_rules->end());
135162
}
136163

137164
metadata.emplace_back(std::move(trace_sampling_rules_metadata));
138165
}
139166

140-
rules.insert(rules_.cbegin(), rules_.cend());
141-
trace_sampler_->set_rules(rules);
167+
trace_sampler_->set_rules(std::move(rules));
142168

143169
if (!conf.tags) {
144170
reset_config(ConfigName::TAGS, span_defaults_, metadata);

src/datadog/config_manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class ConfigManager {
5555
std::unordered_map<ConfigName, ConfigMetadata> default_metadata_;
5656

5757
std::shared_ptr<TraceSampler> trace_sampler_;
58-
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules_;
58+
std::vector<TraceSamplerRule> rules_;
5959

6060
DynamicConfig<std::shared_ptr<const SpanDefaults>> span_defaults_;
6161
DynamicConfig<bool> report_traces_;

src/datadog/span_matcher.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,6 @@ struct SpanMatcher {
3737
return (service == other.service && name == other.name &&
3838
resource == other.resource && tags == other.tags);
3939
}
40-
41-
// TODO: add tags
42-
struct Hash {
43-
size_t operator()(const SpanMatcher& rule) const {
44-
return std::hash<std::string>()(rule.service) ^
45-
(std::hash<std::string>()(rule.name) << 1) ^
46-
(std::hash<std::string>()(rule.resource) << 2);
47-
}
48-
};
4940
};
5041

5142
static const SpanMatcher catch_all;

src/datadog/trace_sampler.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ TraceSampler::TraceSampler(const FinalizedTraceSamplerConfig& config,
2121
limiter_(clock, config.max_per_second),
2222
limiter_max_per_second_(config.max_per_second) {}
2323

24-
void TraceSampler::set_rules(
25-
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>
26-
rules) {
24+
void TraceSampler::set_rules(std::vector<TraceSamplerRule> rules) {
2725
std::lock_guard lock(mutex_);
2826
rules_ = std::move(rules);
2927
}
@@ -35,18 +33,18 @@ SamplingDecision TraceSampler::decide(const SpanData& span) {
3533
// First check sampling rules.
3634
const auto found_rule =
3735
std::find_if(rules_.cbegin(), rules_.cend(),
38-
[&](const auto& it) { return it.first.match(span); });
36+
[&](const auto& it) { return it.matcher.match(span); });
3937

4038
// `mutex_` protects `limiter_`, `collector_sample_rates_`, and
4139
// `collector_default_sample_rate_`, so let's lock it here.
4240
std::lock_guard lock(mutex_);
4341

4442
if (found_rule != rules_.end()) {
45-
const auto& [rule, rate] = *found_rule;
46-
decision.mechanism = int(rate.mechanism);
43+
const auto& rule = *found_rule;
44+
decision.mechanism = int(rule.mechanism);
4745
decision.limiter_max_per_second = limiter_max_per_second_;
48-
decision.configured_rate = rate.value;
49-
const std::uint64_t threshold = max_id_from_rate(rate.value);
46+
decision.configured_rate = rule.rate;
47+
const std::uint64_t threshold = max_id_from_rate(rule.rate);
5048
if (knuth_hash(span.trace_id.low) < threshold) {
5149
const auto result = limiter_.allow();
5250
if (result.allowed) {
@@ -106,10 +104,8 @@ void TraceSampler::handle_collector_response(
106104

107105
nlohmann::json TraceSampler::config_json() const {
108106
std::vector<nlohmann::json> rules;
109-
for (const auto& [rule, rate] : rules_) {
110-
nlohmann::json j = rule.to_json();
111-
j["sampling_rate"] = rate.value.value();
112-
rules.push_back(std::move(j));
107+
for (const auto& rule : rules_) {
108+
rules.push_back(rule.to_json());
113109
}
114110

115111
return nlohmann::json::object({

src/datadog/trace_sampler.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,14 @@ class TraceSampler {
107107

108108
Optional<Rate> collector_default_sample_rate_;
109109
std::unordered_map<std::string, Rate> collector_sample_rates_;
110-
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules_;
110+
std::vector<TraceSamplerRule> rules_;
111111
Limiter limiter_;
112112
double limiter_max_per_second_;
113113

114114
public:
115115
TraceSampler(const FinalizedTraceSamplerConfig& config, const Clock& clock);
116116

117-
void set_rules(
118-
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>
119-
rules);
117+
void set_rules(std::vector<TraceSamplerRule> rules);
120118

121119
// Return a sampling decision for the specified root span.
122120
SamplingDecision decide(const SpanData&);

src/datadog/trace_sampler_config.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,12 @@ std::string to_string(const std::vector<TraceSamplerConfig::Rule> &rules) {
144144

145145
} // namespace
146146

147+
nlohmann::json TraceSamplerRule::to_json() const {
148+
auto j = matcher.to_json();
149+
j["sample_rate"] = rate.value();
150+
return j;
151+
}
152+
147153
TraceSamplerConfig::Rule::Rule(const SpanMatcher &base) : SpanMatcher(base) {}
148154

149155
Expected<FinalizedTraceSamplerConfig> finalize_config(
@@ -181,9 +187,11 @@ Expected<FinalizedTraceSamplerConfig> finalize_config(
181187
return error->with_prefix(prefix);
182188
}
183189

184-
SpanMatcher matcher = rule;
185-
result.rules.emplace(
186-
matcher, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE});
190+
TraceSamplerRule finalized_rule;
191+
finalized_rule.matcher = rule;
192+
finalized_rule.rate = *maybe_rate;
193+
finalized_rule.mechanism = SamplingMechanism::RULE;
194+
result.rules.emplace_back(std::move(finalized_rule));
187195
}
188196

189197
Optional<double> sample_rate;
@@ -213,8 +221,11 @@ Expected<FinalizedTraceSamplerConfig> finalize_config(
213221
"Unable to parse overall sample_rate for trace sampling: ");
214222
}
215223

216-
result.rules.emplace(
217-
catch_all, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE});
224+
TraceSamplerRule finalized_rule;
225+
finalized_rule.rate = *maybe_rate;
226+
finalized_rule.matcher = catch_all;
227+
finalized_rule.mechanism = SamplingMechanism::RULE;
228+
result.rules.emplace_back(std::move(finalized_rule));
218229
}
219230

220231
const auto [origin, max_per_second] =

src/datadog/trace_sampler_config.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,12 @@
2121
namespace datadog {
2222
namespace tracing {
2323

24-
struct TraceSamplerRate final {
25-
Rate value;
24+
struct TraceSamplerRule final {
25+
Rate rate;
26+
SpanMatcher matcher;
2627
SamplingMechanism mechanism;
28+
29+
nlohmann::json to_json() const;
2730
};
2831

2932
struct TraceSamplerConfig {
@@ -48,8 +51,8 @@ class FinalizedTraceSamplerConfig {
4851

4952
public:
5053
double max_per_second;
54+
std::vector<TraceSamplerRule> rules;
5155
std::unordered_map<ConfigName, ConfigMetadata> metadata;
52-
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules;
5356
};
5457

5558
Expected<FinalizedTraceSamplerConfig> finalize_config(

test/test_tracer_config.cpp

Lines changed: 31 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -606,11 +606,11 @@ TEST_CASE("TracerConfig::trace_sampler") {
606606
SECTION("yields one sampling rule") {
607607
auto finalized = finalize_config(config);
608608
REQUIRE(finalized);
609-
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
609+
REQUIRE(finalized->trace_sampler.rules.size() == 1);
610610
// and the default sample_rate is 100%
611-
const auto& rate = finalized->trace_sampler.rules[catch_all];
612-
CHECK(rate.value == 1.0);
613-
CHECK(rate.mechanism == SamplingMechanism::RULE);
611+
const auto& rule = finalized->trace_sampler.rules.front();
612+
CHECK(rule.rate == 1.0);
613+
CHECK(rule.mechanism == SamplingMechanism::RULE);
614614
}
615615

616616
SECTION("has to have a valid sample_rate") {
@@ -631,43 +631,44 @@ TEST_CASE("TracerConfig::trace_sampler") {
631631
rules[1].sample_rate = 0.6;
632632
auto finalized = finalize_config(config);
633633
REQUIRE(finalized);
634-
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
634+
REQUIRE(finalized->trace_sampler.rules.size() == 2);
635635

636-
const auto& rate = finalized->trace_sampler.rules[catch_all];
637-
CHECK(rate.value == 0.5);
638-
CHECK(rate.mechanism == SamplingMechanism::RULE);
636+
const auto& rule = finalized->trace_sampler.rules.front();
637+
CHECK(rule.rate == 0.5);
638+
CHECK(rule.mechanism == SamplingMechanism::RULE);
639639
}
640640

641641
SECTION("global sample_rate creates a catch-all rule") {
642642
config.trace_sampler.sample_rate = 0.25;
643643
auto finalized = finalize_config(config);
644644
REQUIRE(finalized);
645-
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
646-
const auto& rate = finalized->trace_sampler.rules[catch_all];
647-
CHECK(rate.value == 0.25);
648-
CHECK(rate.mechanism == SamplingMechanism::RULE);
645+
REQUIRE(finalized->trace_sampler.rules.size() == 1);
646+
const auto& rule = finalized->trace_sampler.rules.front();
647+
REQUIRE(rule.rate == 0.25);
648+
REQUIRE(rule.matcher.service == "*");
649+
REQUIRE(rule.matcher.name == "*");
650+
REQUIRE(rule.matcher.resource == "*");
651+
REQUIRE(rule.matcher.tags.empty());
649652
}
650653

651654
SECTION("DD_TRACE_SAMPLE_RATE") {
652655
SECTION("sets the global sample_rate") {
653656
const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"};
654657
auto finalized = finalize_config(config);
655658
REQUIRE(finalized);
656-
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
657-
const auto& rate = finalized->trace_sampler.rules[catch_all];
658-
CHECK(rate.value == 0.5);
659-
CHECK(rate.mechanism == SamplingMechanism::RULE);
659+
REQUIRE(finalized->trace_sampler.rules.size() == 1);
660+
REQUIRE(finalized->trace_sampler.rules.front().rate == 0.5);
661+
REQUIRE(finalized->trace_sampler.rules.front().mechanism ==
662+
SamplingMechanism::RULE);
660663
}
661664

662665
SECTION("overrides TraceSamplerConfig::sample_rate") {
663666
config.trace_sampler.sample_rate = 0.25;
664667
const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"};
665668
auto finalized = finalize_config(config);
666669
REQUIRE(finalized);
667-
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
668-
const auto& rate = finalized->trace_sampler.rules[catch_all];
669-
CHECK(rate.value == 0.5);
670-
CHECK(rate.mechanism == SamplingMechanism::RULE);
670+
REQUIRE(finalized->trace_sampler.rules.size() == 1);
671+
REQUIRE(finalized->trace_sampler.rules.front().rate == 0.5);
671672
}
672673

673674
SECTION("has to have a valid value") {
@@ -799,27 +800,16 @@ TEST_CASE("TracerConfig::trace_sampler") {
799800
CAPTURE(rules_json);
800801
CAPTURE(rules);
801802
REQUIRE(rules.size() == 2);
802-
803-
SpanMatcher matcher;
804-
matcher.service = "poohbear";
805-
matcher.name = "get.honey";
806-
807-
auto found = rules.find(matcher);
808-
REQUIRE(found != rules.cend());
809-
810-
CHECK(found->second.value == 0);
811-
CHECK(found->second.mechanism == SamplingMechanism::RULE);
812-
813-
SpanMatcher matcher2;
814-
matcher2.service = "*";
815-
matcher2.name = "*";
816-
matcher2.tags.emplace("error", "*");
817-
matcher2.resource = "/admin/*";
818-
819-
found = rules.find(matcher2);
820-
REQUIRE(found != rules.cend());
821-
CHECK(found->second.value == 1);
822-
CHECK(found->second.mechanism == SamplingMechanism::RULE);
803+
REQUIRE(rules[0].matcher.service == "poohbear");
804+
REQUIRE(rules[0].matcher.name == "get.honey");
805+
REQUIRE(rules[0].rate == 0);
806+
REQUIRE(rules[0].matcher.tags.size() == 0);
807+
REQUIRE(rules[1].matcher.service == "*");
808+
REQUIRE(rules[1].matcher.name == "*");
809+
REQUIRE(rules[1].rate == 1);
810+
REQUIRE(rules[1].matcher.tags.size() == 1);
811+
REQUIRE(rules[1].matcher.tags.at("error") == "*");
812+
REQUIRE(rules[1].matcher.resource == "/admin/*");
823813
}
824814

825815
SECTION("must be valid") {

0 commit comments

Comments
 (0)