Skip to content

Commit 0ada79d

Browse files
authored
feat: support Remote Config sampling rules (#116)
Now, sampling remote configuration do not create a new trace sampler which had the side effect to reset the rate limiter. - add sampling rules RC support - fix: report telemetry sampling rules - fix: report telemetry rps and sample rate for span - report remote trace sample rate as RULE instead of REMOTE_RULE for legacy reasons - update REMOTE_RULES and REMOTE_ADAPTIVE_RULE values to match the spec - report default sample rate for telemetry - add _dd.psr for new remote rules
1 parent 9fedff2 commit 0ada79d

14 files changed

+239
-90
lines changed

src/datadog/config_manager.cpp

Lines changed: 96 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,81 @@
66

77
namespace datadog {
88
namespace tracing {
9+
namespace {
10+
11+
using Rules =
12+
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>;
13+
14+
Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
15+
Rules parsed_rules;
16+
17+
std::string type = json_rules.type_name();
18+
if (type != "array") {
19+
std::string message;
20+
return Error{Error::TRACE_SAMPLING_RULES_WRONG_TYPE, std::move(message)};
21+
}
22+
23+
for (const auto& json_rule : json_rules) {
24+
auto matcher = SpanMatcher::from_json(json_rule);
25+
if (auto* error = matcher.if_error()) {
26+
std::string prefix;
27+
return error->with_prefix(prefix);
28+
}
29+
30+
TraceSamplerRate rate;
31+
if (auto sample_rate = json_rule.find("sample_rate");
32+
sample_rate != json_rule.end()) {
33+
type = sample_rate->type_name();
34+
if (type != "number") {
35+
std::string message;
36+
return Error{Error::TRACE_SAMPLING_RULES_SAMPLE_RATE_WRONG_TYPE,
37+
std::move(message)};
38+
}
39+
40+
auto maybe_rate = Rate::from(*sample_rate);
41+
if (auto error = maybe_rate.if_error()) {
42+
return *error;
43+
}
44+
45+
rate.value = *maybe_rate;
46+
}
47+
48+
if (auto provenance_it = json_rule.find("provenance");
49+
provenance_it != json_rule.cend()) {
50+
if (!provenance_it->is_string()) {
51+
std::string message;
52+
return Error{Error::TRACE_SAMPLING_RULES_SAMPLE_RATE_WRONG_TYPE,
53+
std::move(message)};
54+
}
55+
56+
auto provenance = provenance_it->get<std::string_view>();
57+
if (provenance == "customer") {
58+
rate.mechanism = SamplingMechanism::REMOTE_RULE;
59+
} else if (provenance == "dynamic") {
60+
rate.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE;
61+
}
62+
}
63+
64+
parsed_rules.emplace(std::move(*matcher), std::move(rate));
65+
}
66+
67+
return parsed_rules;
68+
}
69+
70+
} // namespace
971

1072
ConfigManager::ConfigManager(const FinalizedTracerConfig& config)
1173
: clock_(config.clock),
1274
default_metadata_(config.metadata),
1375
trace_sampler_(
1476
std::make_shared<TraceSampler>(config.trace_sampler, clock_)),
77+
rules_(config.trace_sampler.rules),
1578
span_defaults_(std::make_shared<SpanDefaults>(config.defaults)),
1679
report_traces_(config.report_traces) {}
1780

1881
std::shared_ptr<TraceSampler> ConfigManager::trace_sampler() {
1982
std::lock_guard<std::mutex> lock(mutex_);
20-
return trace_sampler_.value();
83+
return trace_sampler_;
2184
}
2285

2386
std::shared_ptr<const SpanDefaults> ConfigManager::span_defaults() {
@@ -35,32 +98,48 @@ std::vector<ConfigMetadata> ConfigManager::update(const ConfigUpdate& conf) {
3598

3699
std::lock_guard<std::mutex> lock(mutex_);
37100

101+
decltype(rules_) rules;
102+
38103
if (!conf.trace_sampling_rate) {
39-
reset_config(ConfigName::TRACE_SAMPLING_RATE, trace_sampler_, metadata);
104+
auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RATE);
105+
if (found != default_metadata_.cend()) {
106+
metadata.push_back(found->second);
107+
}
40108
} else {
41109
ConfigMetadata trace_sampling_metadata(
42110
ConfigName::TRACE_SAMPLING_RATE,
43111
to_string(*conf.trace_sampling_rate, 1),
44112
ConfigMetadata::Origin::REMOTE_CONFIG);
45113

46-
TraceSamplerConfig trace_sampler_cfg;
47-
trace_sampler_cfg.sample_rate = *conf.trace_sampling_rate;
114+
auto rate = Rate::from(*conf.trace_sampling_rate);
115+
rules[catch_all] = TraceSamplerRate{*rate, SamplingMechanism::RULE};
116+
117+
metadata.emplace_back(std::move(trace_sampling_metadata));
118+
}
48119

49-
auto finalized_trace_sampler_cfg = finalize_config(trace_sampler_cfg);
50-
if (auto error = finalized_trace_sampler_cfg.if_error()) {
51-
trace_sampling_metadata.error = *error;
120+
if (!conf.trace_sampling_rules) {
121+
auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RULES);
122+
if (found != default_metadata_.cend()) {
123+
metadata.emplace_back(found->second);
52124
}
125+
} else {
126+
ConfigMetadata trace_sampling_rules_metadata(
127+
ConfigName::TRACE_SAMPLING_RULES, conf.trace_sampling_rules->dump(),
128+
ConfigMetadata::Origin::REMOTE_CONFIG);
53129

54-
auto trace_sampler =
55-
std::make_shared<TraceSampler>(*finalized_trace_sampler_cfg, clock_);
130+
auto maybe_rules = parse_trace_sampling_rules(*conf.trace_sampling_rules);
131+
if (auto error = maybe_rules.if_error()) {
132+
trace_sampling_rules_metadata.error = std::move(*error);
133+
} else {
134+
rules.merge(*maybe_rules);
135+
}
56136

57-
// This reset rate limiting and `TraceSampler` has no `operator==`.
58-
// TODO: Instead of creating another `TraceSampler`, we should
59-
// update the default sampling rate.
60-
trace_sampler_ = std::move(trace_sampler);
61-
metadata.emplace_back(std::move(trace_sampling_metadata));
137+
metadata.emplace_back(std::move(trace_sampling_rules_metadata));
62138
}
63139

140+
rules.insert(rules_.cbegin(), rules_.cend());
141+
trace_sampler_->set_rules(rules);
142+
64143
if (!conf.tags) {
65144
reset_config(ConfigName::TAGS, span_defaults_, metadata);
66145
} else {
@@ -109,10 +188,9 @@ std::vector<ConfigMetadata> ConfigManager::reset() { return update({}); }
109188

110189
nlohmann::json ConfigManager::config_json() const {
111190
std::lock_guard<std::mutex> lock(mutex_);
112-
return nlohmann::json{
113-
{"defaults", to_json(*span_defaults_.value())},
114-
{"trace_sampler", trace_sampler_.value()->config_json()},
115-
{"report_traces", report_traces_.value()}};
191+
return nlohmann::json{{"defaults", to_json(*span_defaults_.value())},
192+
{"trace_sampler", trace_sampler_->config_json()},
193+
{"report_traces", report_traces_.value()}};
116194
}
117195

118196
} // namespace tracing

src/datadog/config_manager.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ class ConfigManager {
5454
Clock clock_;
5555
std::unordered_map<ConfigName, ConfigMetadata> default_metadata_;
5656

57-
DynamicConfig<std::shared_ptr<TraceSampler>> trace_sampler_;
57+
std::shared_ptr<TraceSampler> trace_sampler_;
58+
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules_;
59+
5860
DynamicConfig<std::shared_ptr<const SpanDefaults>> span_defaults_;
5961
DynamicConfig<bool> report_traces_;
6062

src/datadog/config_update.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ struct ConfigUpdate {
1717
Optional<bool> report_traces;
1818
Optional<double> trace_sampling_rate;
1919
Optional<std::vector<StringView>> tags;
20+
const nlohmann::json* trace_sampling_rules = nullptr;
2021
};
2122

2223
} // namespace tracing

src/datadog/remote_config.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ namespace {
3030
enum CapabilitiesFlag : uint64_t {
3131
APM_TRACING_SAMPLE_RATE = 1 << 12,
3232
APM_TRACING_TAGS = 1 << 15,
33-
APM_TRACING_ENABLED = 1 << 19
33+
APM_TRACING_ENABLED = 1 << 19,
34+
APM_TRACING_SAMPLE_RULES = 1 << 29,
3435
};
3536

3637
constexpr std::array<uint8_t, sizeof(uint64_t)> capabilities_byte_array(
@@ -46,7 +47,7 @@ constexpr std::array<uint8_t, sizeof(uint64_t)> capabilities_byte_array(
4647

4748
constexpr std::array<uint8_t, sizeof(uint64_t)> k_apm_capabilities =
4849
capabilities_byte_array(APM_TRACING_SAMPLE_RATE | APM_TRACING_TAGS |
49-
APM_TRACING_ENABLED);
50+
APM_TRACING_ENABLED | APM_TRACING_SAMPLE_RULES);
5051

5152
constexpr StringView k_apm_product = "APM_TRACING";
5253
constexpr StringView k_apm_product_path_substring = "/APM_TRACING/";
@@ -69,6 +70,12 @@ ConfigUpdate parse_dynamic_config(const nlohmann::json& j) {
6970
config_update.report_traces = tracing_enabled_it->get<bool>();
7071
}
7172

73+
if (auto tracing_sampling_rules_it = j.find("tracing_sampling_rules");
74+
tracing_sampling_rules_it != j.cend() &&
75+
tracing_sampling_rules_it->is_array()) {
76+
config_update.trace_sampling_rules = &(*tracing_sampling_rules_it);
77+
}
78+
7279
return config_update;
7380
}
7481

src/datadog/sampling_mechanism.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ enum class SamplingMechanism {
5858
// Individual span kept by a matching span sampling rule when the enclosing
5959
// trace was dropped.
6060
SPAN_RULE = 8,
61+
// Reserved for future use.
62+
OTLP_RULE = 9,
63+
// Sampling rule configured by user via remote configuration.
64+
REMOTE_RULE = 11,
65+
// Adaptive sampling rule automatically computed by Datadog backend and sent
66+
// via remote configuration.
67+
REMOTE_ADAPTIVE_RULE = 12,
6168
};
6269

6370
} // namespace tracing

src/datadog/span_matcher.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,23 @@ struct SpanMatcher {
3232
nlohmann::json to_json() const;
3333

3434
static Expected<SpanMatcher> from_json(const nlohmann::json&);
35+
36+
bool operator==(const SpanMatcher& other) const {
37+
return (service == other.service && name == other.name &&
38+
resource == other.resource && tags == other.tags);
39+
}
40+
41+
// TODO: add tags
42+
struct Hash {
43+
size_t operator()(const SpanMatcher& rule) const {
44+
return std::hash<std::string>()(rule.service) ^
45+
(std::hash<std::string>()(rule.name) << 1) ^
46+
(std::hash<std::string>()(rule.resource) << 2);
47+
}
48+
};
3549
};
3650

51+
static const SpanMatcher catch_all;
52+
3753
} // namespace tracing
3854
} // namespace datadog

src/datadog/span_sampler_config.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@ namespace {
1616
std::string to_string(const std::vector<SpanSamplerConfig::Rule> &rules) {
1717
nlohmann::json res;
1818
for (const auto &r : rules) {
19-
res.emplace_back(r.to_json());
19+
auto j = r.to_json();
20+
j["sample_rate"] = r.sample_rate;
21+
if (r.max_per_second) {
22+
j["max_per_second"] = *r.max_per_second;
23+
}
24+
res.emplace_back(std::move(j));
2025
}
2126

2227
return res.dump();

src/datadog/trace_sampler.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,32 @@ TraceSampler::TraceSampler(const FinalizedTraceSamplerConfig& config,
2121
limiter_(clock, config.max_per_second),
2222
limiter_max_per_second_(config.max_per_second) {}
2323

24+
void TraceSampler::set_rules(
25+
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>
26+
rules) {
27+
std::lock_guard lock(mutex_);
28+
rules_ = std::move(rules);
29+
}
30+
2431
SamplingDecision TraceSampler::decide(const SpanData& span) {
2532
SamplingDecision decision;
2633
decision.origin = SamplingDecision::Origin::LOCAL;
2734

2835
// First check sampling rules.
29-
auto found_rule =
30-
std::find_if(rules_.begin(), rules_.end(),
31-
[&](const auto& rule) { return rule.match(span); });
36+
const auto found_rule =
37+
std::find_if(rules_.cbegin(), rules_.cend(),
38+
[&](const auto& it) { return it.first.match(span); });
3239

3340
// `mutex_` protects `limiter_`, `collector_sample_rates_`, and
3441
// `collector_default_sample_rate_`, so let's lock it here.
3542
std::lock_guard lock(mutex_);
3643

3744
if (found_rule != rules_.end()) {
38-
const auto& rule = *found_rule;
39-
decision.mechanism = int(SamplingMechanism::RULE);
45+
const auto& [rule, rate] = *found_rule;
46+
decision.mechanism = int(rate.mechanism);
4047
decision.limiter_max_per_second = limiter_max_per_second_;
41-
decision.configured_rate = rule.sample_rate;
42-
const std::uint64_t threshold = max_id_from_rate(rule.sample_rate);
48+
decision.configured_rate = rate.value;
49+
const std::uint64_t threshold = max_id_from_rate(rate.value);
4350
if (knuth_hash(span.trace_id.low) < threshold) {
4451
const auto result = limiter_.allow();
4552
if (result.allowed) {
@@ -99,8 +106,10 @@ void TraceSampler::handle_collector_response(
99106

100107
nlohmann::json TraceSampler::config_json() const {
101108
std::vector<nlohmann::json> rules;
102-
for (const auto& rule : rules_) {
103-
rules.push_back(to_json(rule));
109+
for (const auto& [rule, rate] : rules_) {
110+
nlohmann::json j = rule.to_json();
111+
j["sampling_rate"] = rate.value.value();
112+
rules.push_back(std::move(j));
104113
}
105114

106115
return nlohmann::json::object({

src/datadog/trace_sampler.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,22 @@ struct SamplingDecision;
102102
struct SpanData;
103103

104104
class TraceSampler {
105+
private:
105106
std::mutex mutex_;
106107

107108
Optional<Rate> collector_default_sample_rate_;
108109
std::unordered_map<std::string, Rate> collector_sample_rates_;
109-
110-
std::vector<FinalizedTraceSamplerConfig::Rule> rules_;
110+
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules_;
111111
Limiter limiter_;
112112
double limiter_max_per_second_;
113113

114114
public:
115115
TraceSampler(const FinalizedTraceSamplerConfig& config, const Clock& clock);
116116

117+
void set_rules(
118+
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>
119+
rules);
120+
117121
// Return a sampling decision for the specified root span.
118122
SamplingDecision decide(const SpanData&);
119123

0 commit comments

Comments
 (0)