From 47f9942ba9cce99e4f109f76189a12023ef9df03 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:36:58 -0500 Subject: [PATCH 01/53] modify the Trace event to include a TraceSampling field, which stores metadata from the sampling transform component --- .../src/data_model/event/trace/mod.rs | 76 ++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/lib/saluki-core/src/data_model/event/trace/mod.rs b/lib/saluki-core/src/data_model/event/trace/mod.rs index 30668a66f4..e0d9f28c87 100644 --- a/lib/saluki-core/src/data_model/event/trace/mod.rs +++ b/lib/saluki-core/src/data_model/event/trace/mod.rs @@ -3,6 +3,53 @@ use saluki_common::collections::FastHashMap; use saluki_context::tags::TagSet; use stringtheory::MetaString; + +/// Trace-level sampling metadata. +/// +/// This struct stores sampling-related metadata that applies to the entire trace, +/// typically set by the trace sampler and consumed by the encoder. +#[derive(Clone, Debug, PartialEq)] +pub struct TraceSampling { + /// Whether or not the trace was dropped during sampling. + pub dropped_trace: bool, + /// The sampling priority assigned to this trace. + /// + /// Common values include: + /// - `2`: Manual keep (user-requested) + /// - `1`: Auto keep (sampled in) + /// - `0`: Auto drop (sampled out) + /// - `-1`: Manual drop (user-requested drop) + pub priority: Option, + + /// The decision maker identifier indicating which sampler made the sampling decision. + /// + /// Common values include: + /// - `-9`: Probabilistic sampler + /// - `-4`: Errors sampler + /// - `None`: No decision maker set + pub decision_maker: Option, + + /// The OTLP sampling rate applied to this trace, formatted as a string (e.g., "0.25"). + /// + /// This corresponds to the `_dd.otlp_sr` tag and represents the effective sampling rate + /// from the OTLP ingest path. + pub otlp_sampling_rate: Option, +} + +impl TraceSampling { + /// Creates a new `TraceSampling` instance. + pub fn new( + dropped_trace:bool, priority: Option, decision_maker: Option, otlp_sampling_rate: Option, + ) -> Self { + Self { + dropped_trace, + priority, + decision_maker, + otlp_sampling_rate, + } + } +} + /// A trace event. /// /// A trace is a collection of spans that represent a distributed trace. @@ -14,12 +61,22 @@ pub struct Trace { /// /// This is derived from the resource of the spans and used to construct the tracer payload. resource_tags: TagSet, + /// Trace-level sampling metadata. + /// + /// This field contains sampling decision information (priority, decision maker, rates) + /// that applies to the entire trace. It is set by the trace sampler component and consumed + /// by the encoder to populate trace chunk metadata. + sampling: Option, } impl Trace { /// Creates a new `Trace` with the given spans. pub fn new(spans: Vec, resource_tags: TagSet) -> Self { - Self { spans, resource_tags } + Self { + spans, + resource_tags, + sampling: None, + } } /// Returns a reference to the spans in this trace. @@ -36,6 +93,16 @@ impl Trace { pub fn resource_tags(&self) -> &TagSet { &self.resource_tags } + + /// Returns a reference to the trace-level sampling metadata, if present. + pub fn sampling(&self) -> Option<&TraceSampling> { + self.sampling.as_ref() + } + + /// Sets the trace-level sampling metadata. + pub fn set_sampling(&mut self, sampling: Option) { + self.sampling = sampling; + } } /// A span event. @@ -241,7 +308,7 @@ impl Span { &self.meta } - /// Returns a mutable reference to the string-valued tag map. + /// Returns a mutable reference to the meta map. pub fn meta_mut(&mut self) -> &mut FastHashMap { &mut self.meta } @@ -251,6 +318,11 @@ impl Span { &self.metrics } + /// Returns a mutable reference to the metrics map. + pub fn metrics_mut(&mut self) -> &mut FastHashMap { + &mut self.metrics + } + /// Returns the structured metadata map. pub fn meta_struct(&self) -> &FastHashMap> { &self.meta_struct From 704d1b135e71983e367a6c28f7be2090c6758fd3 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:37:39 -0500 Subject: [PATCH 02/53] add the probabilistic sampler --- .../transforms/trace_sampler/probabilistic.rs | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs diff --git a/lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs b/lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs new file mode 100644 index 0000000000..8bbae09297 --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs @@ -0,0 +1,48 @@ +//! Probabilistic sampling. + +use super::signature::fnv1a_32; + +// Knuth multiplicative hashing factor for deterministic sampling. +// +// This constant is shared across datadog-agent, Datadog libraries, and OpenTelemetry. +// It is currently unused, but kept to mirror the upstream implementation for future work. +#[allow(dead_code)] +const KNUTH_FACTOR: u64 = 1111111111111111111; + +// Probabilistic sampler constants (matching datadog-agent's bucketed sampler). +// These constants exist to match the behavior of the OTEL probabilistic sampler. +// See: https://github.com/open-telemetry/opentelemetry-collector-contrib/.../probabilisticsamplerprocessor/tracesprocessor.go#L38-L42 +const NUM_PROBABILISTIC_BUCKETS: u32 = 0x4000; +const BITMASK_HASH_BUCKETS: u32 = NUM_PROBABILISTIC_BUCKETS - 1; + +/// `probRateKey` indicates the percentage sampling rate configured for the probabilistic sampler. +pub(super) const PROB_RATE_KEY: &str = "_dd.prob_sr"; + +/// Probabilistic sampler. +pub(super) struct ProbabilisticSampler; + +impl ProbabilisticSampler { + /// Deterministically sample a trace based on its trace ID. + /// + /// This mirrors the behavior of the Datadog Agent's bucketed probabilistic sampler. + pub(super) fn sample(trace_id: u64, sampling_rate: f64) -> bool { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/probabilistic.go#L62 + // we take in a trace id (randomly generated) and hash/mask it to get a number between 0 and 0x3FFF and compare it to the sampling rate. + // TODO: add full trace id mode (off by default) + + // to match the agent behaviour, we need to make the array 16 bytes long, this is used for full trace id mode + // but we require it now to match the hash. + let mut tid = [0u8; 16]; + tid[..8].copy_from_slice(&trace_id.to_be_bytes()); + + // Match the datadog-agent bucketed probabilistic sampler behavior. + // (Fixed zero hash seed; equivalent to the agent's default when unset.) + // TODO: make the hash seed configurable + let hash_seed = [0u8; 4]; + let hash = fnv1a_32(&hash_seed, &tid); + let scaled_sampling_percentage = (sampling_rate * NUM_PROBABILISTIC_BUCKETS as f64) as u32; + // bitMaskHashBuckets = 0x3FFF (binary: 0011111111111111 = 14 bits set + // so we keep the lower 14 bits of the hash. + (hash & BITMASK_HASH_BUCKETS) < scaled_sampling_percentage + } +} From b3979ab3fb6e54ea35cfe62caa82d472b4e81814 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:38:21 -0500 Subject: [PATCH 03/53] add the error sampler --- .../src/transforms/trace_sampler/errors.rs | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 lib/saluki-components/src/transforms/trace_sampler/errors.rs diff --git a/lib/saluki-components/src/transforms/trace_sampler/errors.rs b/lib/saluki-components/src/transforms/trace_sampler/errors.rs new file mode 100644 index 0000000000..b25af304a7 --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/errors.rs @@ -0,0 +1,54 @@ +//! Error sampling. +//! +//! The error sampler catches traces containing spans with errors, ensuring +//! error visibility even at low sampling rates. + +#![allow(dead_code)] + +use std::time::SystemTime; + +use saluki_core::data_model::event::trace::{Span, Trace}; + +use super::score_sampler::{ScoreSampler, ERRORS_RATE_KEY}; + +/// Error sampler for traces. +/// +/// Wraps a ScoreSampler configured specifically for error sampling. +/// This ensures traces with errors are caught even when the main sampler +/// would drop them. +pub(super) struct ErrorsSampler { + score_sampler: ScoreSampler, +} + +impl ErrorsSampler { + /// Create a new ErrorsSampler with the given configuration. + pub(super) fn new(error_tps: f64, extra_sample_rate: f64) -> Self { + let disabled = error_tps == 0.0; + Self { + score_sampler: ScoreSampler::new(ERRORS_RATE_KEY, disabled, error_tps, extra_sample_rate), + } + } + + /// + /// This method should be called when a trace contains errors and needs to be + /// evaluated by the error sampler. + pub(super) fn sample_error(&mut self, now: SystemTime, trace: &mut Trace, root_idx: usize) -> bool { + // Use the score sampler to make the sampling decision + self.score_sampler.sample(now, trace, root_idx) + } + + /// Set the error sampling rate metric on a span. + pub(super) fn set_sampling_rate_metric(&self, span: &mut Span, rate: f64) { + self.score_sampler.set_sampling_rate_metric(span, rate); + } + + /// Get the target TPS for error sampling. + pub(super) fn get_target_tps(&self) -> f64 { + self.score_sampler.get_target_tps() + } + + /// Update the target TPS for error sampling. + pub(super) fn update_target_tps(&mut self, target_tps: f64) { + self.score_sampler.update_target_tps(target_tps); + } +} From 55cdb76d8ce3e1b47a1108a1f81e22dccd623f58 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:38:41 -0500 Subject: [PATCH 04/53] add the core sampler --- .../transforms/trace_sampler/core_sampler.rs | 257 ++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs diff --git a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs new file mode 100644 index 0000000000..b354838ae9 --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs @@ -0,0 +1,257 @@ +#![allow(dead_code)] + +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use saluki_common::collections::FastHashMap; +use saluki_common::hash::FastBuildHasher; + +use super::signature::Signature; + +const NUM_BUCKETS: usize = 6; +const BUCKET_DURATION: Duration = Duration::from_secs(5); +const MAX_RATE_INCREASE: f64 = 1.2; + +#[derive(Default)] +pub struct Sampler { + /// maps each Signature to a circular buffer of per-bucket (bucket_id) counts covering the last NUM_BUCKETS * BUCKET_DURATION window. + seen: FastHashMap, + + /// allSigSeen counts all signatures in a circular buffer of NUM_BUCKETS of BUCKET_DURATION + all_sigs_seen: [f32; NUM_BUCKETS], + + last_bucket_id: u64, + + rates: FastHashMap, + + lowest_rate: f64, + + // TODO: add comments for the source code, etc. + target_tps: f64, + + extra_rate: f64, +} + +// zeroAndGetMax zeroes expired buckets and returns the max count +// logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/coresampler.go#L185 +fn zero_and_get_max(buckets: &mut [f32; NUM_BUCKETS], previous_bucket: u64, new_bucket: u64) -> f32 { + // A bucket is a BUCKET_DURATION slice (5s) that stores the count of traces that fell in the interval. + // An intuitive understanding of the function is that we start just after previous_buckets and iterate for a full window of buckets (NUM_BUCKETS) + // and zero out any buckets older then new_buckets (expired), then we compute the max_count amoung the buckets that are in the current window + let mut max_bucket = 0 as f32; + for i in (previous_bucket + 1)..=previous_bucket + NUM_BUCKETS as u64 { + let index = i as usize % NUM_BUCKETS; + // if a complete rotation (time between previous_bucket and new_bucket is more then NUM_BUCKETS * BUCKET_DURATION) happened between previous_bucket and new_bucket + // all buckets will be zeroed + if i < new_bucket { + buckets[index] = 0.0; + continue; + } + let value = buckets[index]; + if value > max_bucket { + max_bucket = value; + } + // zeroing after taking in account the previous value of the bucket + // overridden by this rotation. This allows to take in account all buckets + if i == new_bucket { + buckets[index] = 0.0; + } + } + max_bucket +} + +// compute_tps_per_sig distributes TPS looking at the seen_tps of all signatures. +// By default it spreads uniformly the TPS on all signatures. If a signature +// is low volume and does not use all of its TPS, the remaining is spread uniformly +// on all other signatures. The returned sig_target is the final per_signature TPS target +// logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/coresampler.go#L167 +fn compute_tps_per_sig(target_tps: f64, seen_tps: &Vec) -> f64 { + // Example: target_tps = 30, seen_tps = [5, 10, 100] → sorted stays [5, 10, 100], Initial sig_target = 30 / 3 = 10 + // Loop: + // 1) c = 5 (< 10), so subtract: target_tps = 30 - 5 = 25 + // Recompute sig_target = 25 / 2 = 12.5 + // 2) c = 10 (< 12.5), subtract: target_tps = 25 - 10 = 15 + // Recompute sig_target = 15 / 1 = 15 + // 3) Next is last element, break. + // Return sig_target = 15. + // Interpretation: the low‑volume signatures "use up" 5 and 10 TPS, and the remaining budget (15) is the per‑signature target for the higher‑volume signature(s). + let mut sorted: Vec = seen_tps.clone(); + sorted.sort_by(|a, b| a.total_cmp(b)); + // compute the initial per_signature TPS budget by splitting target_tps across all signatures. + let mut remaining_tps = target_tps; + let mut sig_target = remaining_tps / sorted.len() as f64; + + for (i, c) in sorted.iter().enumerate() { + if *c >= sig_target || i == sorted.len() - 1 { + break; + } + remaining_tps -= c; + sig_target = remaining_tps / (sorted.len() - i - 1) as f64; + } + sig_target +} + +impl Sampler { + pub fn new(extra_rate: f64, target_tps: f64) -> Sampler { + Self { + extra_rate, + target_tps, + ..Default::default() + } + } + + pub(super) fn count_weighted_sig(&mut self, now: SystemTime, signature: &Signature, n: f32) -> bool { + // All traces within the same `BUCKET_DURATION` interval share the same bucket_id + let bucket_id = now.duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() / BUCKET_DURATION.as_secs(); + let prev_bucket_id = self.last_bucket_id; + // If the bucket_id changed then the sliding window advanced and we need to recompute rates + let update_rate = prev_bucket_id != bucket_id; + if update_rate { + self.update_rates(prev_bucket_id, bucket_id); + } + + let buckets = self.seen.entry(*signature).or_insert([0 as f32; NUM_BUCKETS]); + self.all_sigs_seen[(bucket_id % (NUM_BUCKETS as u64)) as usize] += n; + buckets[(bucket_id % (NUM_BUCKETS as u64)) as usize] += n; + return update_rate; + } + + // update_rates distributes TPS on each signature and apply it to the moving + // max of seen buckets. + // Rates increase are bounded by 20% increases, it requires 13 evaluations (1.2**13 = 10.6) + // to increase a sampling rate by 10 fold in about 1min. + fn update_rates(&mut self, previous_bucket: u64, new_bucket: u64) { + let seen_len = self.seen.len(); + if seen_len == 0 { + return; + } + let mut rates: FastHashMap = + FastHashMap::with_capacity_and_hasher(seen_len, FastBuildHasher::default()); + // seen_tps is a vector of per-signature peak rates, we get the maximum bucket value (which represents the number/weight of traces in a BUCKET_DURATION interval) + // in the sliding window and convert that to traces per second. Each element is one TPS per signature. + let mut seen_tps_vec = Vec::with_capacity(seen_len); + let mut sigs = Vec::with_capacity(seen_len); + let mut sigs_to_remove = Vec::new(); + + for (sig, buckets) in self.seen.iter_mut() { + let max_bucket = zero_and_get_max(buckets, previous_bucket, new_bucket); + let seen_tps = max_bucket as f64 / BUCKET_DURATION.as_secs() as f64; + seen_tps_vec.push(seen_tps); + sigs.push(*sig); + } + zero_and_get_max(&mut self.all_sigs_seen, previous_bucket, new_bucket); + let tps_per_sig = compute_tps_per_sig(self.target_tps, &seen_tps_vec); + self.lowest_rate = 1.0; + + for (i, sig) in sigs.iter().enumerate() { + let seen_tps = seen_tps_vec[i]; + let mut rate = 1.0; + if tps_per_sig < seen_tps && seen_tps > 0.0 { + rate = tps_per_sig / seen_tps; + } + + // Cap increase rate to 20% + if let Some(prev_rate) = self.rates.get(sig) { + if *prev_rate != 0.0 && rate / prev_rate > MAX_RATE_INCREASE { + rate = prev_rate * MAX_RATE_INCREASE; + } + } + + // Ensure rate doesn't exceed 1.0 + if rate > 1.0 { + rate = 1.0; + } + + // No traffic on this signature, mark it for cleanup + if rate == 1.0 && seen_tps == 0.0 { + sigs_to_remove.push(*sig); + continue; + } + + // Update lowest rate + if rate < self.lowest_rate { + self.lowest_rate = rate; + } + + rates.insert(*sig, rate); + } + + // Clean up signatures with no traffic + for sig in sigs_to_remove { + self.seen.remove(&sig); + } + + self.rates = rates; + } + + /// Gets the sampling rate for a specific signature. + /// Returns the rate multiplied by the extra rate factor. + pub fn get_signature_sample_rate(&self, sig: &Signature) -> f64 { + self.rates + .get(sig) + .map(|rate| rate * self.extra_rate) + .unwrap_or_else(|| self.default_rate()) + } + + /// Gets all signature sample rates. + /// Returns a tuple of (rates map, default rate). + pub fn get_all_signature_sample_rates(&self) -> (FastHashMap, f64) { + let mut rates = FastHashMap::with_capacity_and_hasher(self.rates.len(), FastBuildHasher::default()); + for (sig, rate) in self.rates.iter() { + rates.insert(*sig, rate * self.extra_rate); + } + (rates, self.default_rate()) + } + + /// Computes the default rate for unknown signatures. + /// Based on the moving max of all signatures seen and the lowest stored rate. + fn default_rate(&self) -> f64 { + if self.target_tps == 0.0 { + return 0.0; + } + + let mut max_seen = 0.0_f32; + for &count in self.all_sigs_seen.iter() { + if count > max_seen { + max_seen = count; + } + } + + let seen_tps = max_seen as f64 / BUCKET_DURATION.as_secs() as f64; + let mut rate = 1.0; + + if self.target_tps < seen_tps && seen_tps > 0.0 { + rate = self.target_tps / seen_tps; + } + + if self.lowest_rate < rate && self.lowest_rate != 0.0 { + return self.lowest_rate; + } + + rate + } + + /// Updates the target TPS and adjusts all existing rates proportionally. + pub fn update_target_tps(&mut self, new_target_tps: f64) { + let previous_target_tps = self.target_tps; + self.target_tps = new_target_tps; + + if previous_target_tps == 0.0 { + return; + } + + let ratio = new_target_tps / previous_target_tps; + for rate in self.rates.values_mut() { + *rate = (*rate * ratio).min(1.0); + } + } + + /// Get the current target TPS. + pub fn get_target_tps(&self) -> f64 { + self.target_tps + } + + /// Returns the number of signatures being tracked. + pub fn size(&self) -> i64 { + self.seen.len() as i64 + } +} From e921c3a183414c4cb3489030cfaa5aa29adbf22f Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:38:51 -0500 Subject: [PATCH 05/53] add the score sampler --- .../transforms/trace_sampler/score_sampler.rs | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs new file mode 100644 index 0000000000..70519fd8ac --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -0,0 +1,187 @@ +use std::time::SystemTime; + +use saluki_common::collections::FastHashMap; +use saluki_core::data_model::event::trace::{Span, Trace}; +use stringtheory::MetaString; + +use super::signature::{compute_signature_with_root_and_env, Signature}; +use crate::transforms::trace_sampler::core_sampler::Sampler; + +// Metric keys for sampling rates +const KEY_SAMPLING_RATE_GLOBAL: &str = "_sample_rate"; +const KEY_SAMPLING_RATE_PRE_SAMPLER: &str = "_dd1.sr.rapre"; + +// ScoreSampler-specific rate keys +pub(super) const ERRORS_RATE_KEY: &str = "_dd.errors_sr"; +#[allow(dead_code)] +const NO_PRIORITY_RATE_KEY: &str = "_dd.no_p_sr"; + +// shrinkCardinality is the max Signature cardinality before shrinking +const SHRINK_CARDINALITY: usize = 200; + +// Constants for deterministic sampling +const MAX_TRACE_ID: u64 = u64::MAX; +const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; +// Using a prime number for better distribution +const SAMPLER_HASHER: u64 = 1111111111111111111; + +/// ScoreSampler for traces +/// +/// ScoreSampler samples pieces of traces by computing a signature based on spans (service, name, rsc, http.status, error.type) +/// scoring it and applying a rate. +/// The rates are applied on the TraceID to maximize the number of chunks with errors caught for the same traceID. +/// For a set traceID: P(chunk1 kept and chunk2 kept) = min(P(chunk1 kept), P(chunk2 kept)) +pub struct ScoreSampler { + sampler: Sampler, + sampling_rate_key: &'static str, + disabled: bool, + // When shrinking, the shrink allowlist represents the currently active signatures while new ones get collapsed + shrink_allow_list: Option>, +} + +impl ScoreSampler { + /// Create a new ScoreSampler with the given sampling rate key and target TPS. + pub fn new(sampling_rate_key: &'static str, disabled: bool, target_tps: f64, extra_sample_rate: f64) -> Self { + Self { + sampler: Sampler::new(extra_sample_rate, target_tps), + sampling_rate_key, + disabled, + shrink_allow_list: None, + } + } + + /// Sample a trace chunk when you only have a root span index. + /// + /// This avoids borrowing the full trace while holding a mutable root span reference. + pub fn sample(&mut self, now: SystemTime, trace: &mut Trace, root_idx: usize) -> bool { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/scoresampler.go#L71 + if self.disabled { + return false; + } + + let spans_len = trace.spans().len(); + if spans_len == 0 || root_idx >= spans_len { + return false; + } + + // Compute signature before mutably borrowing the root span. + let signature = compute_signature_with_root_and_env(trace, root_idx); + let signature = self.shrink(signature); + + // Update sampler state by counting this trace + let weight = { + let spans = trace.spans(); + let root = &spans[root_idx]; + weight_root(root) + }; + self.sampler.count_weighted_sig(now, &signature, weight); + + // Get the sampling rate for this signature + let rate = self.sampler.get_signature_sample_rate(&signature); + + // Apply the sampling decision + let root = &mut trace.spans_mut()[root_idx]; + self.apply_sample_rate(root, rate) + } + + /// Apply the sampling rate to determine if the trace should be kept. + fn apply_sample_rate(&self, root: &mut Span, rate: f64) -> bool { + let initial_rate = get_global_rate(root); + let new_rate = initial_rate * rate; + let trace_id = root.trace_id(); + let sampled = sample_by_rate(trace_id, new_rate); + + if sampled { + self.set_sampling_rate_metric(root, rate); + } + + sampled + } + + /// Shrink limits the number of signatures stored in the sampler. + /// After a cardinality above shrinkCardinality/2 is reached + /// signatures are spread uniformly on a fixed set of values. + /// This ensures that ScoreSamplers are memory capped. + /// When the shrink is triggered, previously active signatures + /// stay unaffected. New signatures may share the same TPS computation. + fn shrink(&mut self, sig: Signature) -> Signature { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/scoresampler.go#L151 + if self.sampler.size() < (SHRINK_CARDINALITY / 2) as i64 { + self.shrink_allow_list = None; + return sig; + } + + if self.shrink_allow_list.is_none() { + let (rates, _) = self.sampler.get_all_signature_sample_rates(); + self.shrink_allow_list = Some(rates); + } + + if let Some(ref map) = self.shrink_allow_list { + if map.contains_key(&sig) { + return sig; + } + } + + // Map to a limited set of signatures to bound cardinality and force + // new signatures to share the same bucket and TPS computation. + Signature(sig.0 % (SHRINK_CARDINALITY as u64 / 2)) + } + + /// Get the target TPS for this sampler. + pub fn get_target_tps(&self) -> f64 { + self.sampler.get_target_tps() + } + + /// Update the target TPS for this sampler. + pub fn update_target_tps(&mut self, target_tps: f64) { + self.sampler.update_target_tps(target_tps); + } + + /// Set the sampling rate metric on a span. + pub fn set_sampling_rate_metric(&self, span: &mut Span, rate: f64) { + span.metrics_mut() + .insert(MetaString::from(self.sampling_rate_key), rate); + } +} + +/// Calculate the weight from the span's global rate and presampler rate. +fn weight_root(span: &Span) -> f32 { + let client_rate = span + .metrics() + .get(KEY_SAMPLING_RATE_GLOBAL) + .copied() + .filter(|&r| r > 0.0 && r <= 1.0) + .unwrap_or(1.0); + + let pre_sampler_rate = span + .metrics() + .get(KEY_SAMPLING_RATE_PRE_SAMPLER) + .copied() + .filter(|&r| r > 0.0 && r <= 1.0) + .unwrap_or(1.0); + + (1.0 / (pre_sampler_rate * client_rate)) as f32 +} + +/// Get the cumulative sample rate of the trace to which this span belongs. +fn get_global_rate(span: &Span) -> f64 { + span.metrics().get(KEY_SAMPLING_RATE_GLOBAL).copied().unwrap_or(1.0) +} + +/// SampleByRate returns whether to keep a trace, based on its ID and a sampling rate. +/// This assumes that trace IDs are nearly uniformly distributed. +fn sample_by_rate(trace_id: u64, rate: f64) -> bool { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/angel/support-tail-beginning-wildcard/pkg/trace/sampler/sampler.go#L94 + if rate < 1.0 { + trace_id.wrapping_mul(SAMPLER_HASHER) < (rate * MAX_TRACE_ID_FLOAT) as u64 + } else { + true + } +} + +// TODO: Add SampleV1 method for legacy trace format support +// This would handle the V1 trace format used for backwards compatibility + +// TODO: Add NoPrioritySampler implementation +// NoPrioritySampler is dedicated to catching traces with no priority set. +// It would wrap ScoreSampler similar to ErrorsSampler From b6dac8336c7284e13e6910309b567a7d87c70dff Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:49:05 -0500 Subject: [PATCH 06/53] add setter for trace spans --- lib/saluki-core/src/data_model/event/trace/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/saluki-core/src/data_model/event/trace/mod.rs b/lib/saluki-core/src/data_model/event/trace/mod.rs index e0d9f28c87..5814489ab5 100644 --- a/lib/saluki-core/src/data_model/event/trace/mod.rs +++ b/lib/saluki-core/src/data_model/event/trace/mod.rs @@ -89,6 +89,11 @@ impl Trace { &mut self.spans } + /// Replaces the spans in this trace with the given spans. + pub fn set_spans(&mut self, spans: Vec) { + self.spans = spans; + } + /// Returns the resource-level tags associated with this trace. pub fn resource_tags(&self) -> &TagSet { &self.resource_tags From de45c6d6b551add9c6192ea2769e6f27992694ad Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:49:25 -0500 Subject: [PATCH 07/53] add trace_sampler to mod.rs --- lib/saluki-components/src/transforms/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/saluki-components/src/transforms/mod.rs b/lib/saluki-components/src/transforms/mod.rs index dbeea5720b..e60f774f18 100644 --- a/lib/saluki-components/src/transforms/mod.rs +++ b/lib/saluki-components/src/transforms/mod.rs @@ -21,5 +21,8 @@ pub use self::dogstatsd_mapper::DogstatsDMapperConfiguration; mod metric_router; pub use self::metric_router::MetricRouterConfiguration; +mod trace_sampler; +pub use self::trace_sampler::TraceSamplerConfiguration; + mod apm_stats; pub use self::apm_stats::ApmStatsTransformConfiguration; From 30b0625023b42ea58dde43aaf23ee74138e631bd Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:50:50 -0500 Subject: [PATCH 08/53] checks for user set sampling in the translator --- .../src/common/otlp/traces/translator.rs | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/saluki-components/src/common/otlp/traces/translator.rs b/lib/saluki-components/src/common/otlp/traces/translator.rs index 445fc71dad..66d8bd0ad0 100644 --- a/lib/saluki-components/src/common/otlp/traces/translator.rs +++ b/lib/saluki-components/src/common/otlp/traces/translator.rs @@ -3,7 +3,7 @@ use otlp_protos::opentelemetry::proto::resource::v1::Resource as OtlpResource; use otlp_protos::opentelemetry::proto::trace::v1::ResourceSpans; use saluki_common::collections::FastHashMap; use saluki_context::tags::TagSet; -use saluki_core::data_model::event::trace::{Span as DdSpan, Trace}; +use saluki_core::data_model::event::trace::{Span as DdSpan, Trace, TraceSampling}; use saluki_core::data_model::event::Event; use crate::common::otlp::config::TracesConfig; @@ -52,6 +52,7 @@ impl OtlpTracesTranslator { let resource: OtlpResource = resource_spans.resource.unwrap_or_default(); let resource_tags: TagSet = resource_attributes_to_tagset(&resource.attributes); let mut traces_by_id: FastHashMap> = FastHashMap::default(); + let mut priorities_by_id: FastHashMap = FastHashMap::default(); let ignore_missing_fields = self.config.ignore_missing_datadog_fields; for scope_spans in resource_spans.scope_spans { @@ -67,17 +68,30 @@ impl OtlpTracesTranslator { ignore_missing_fields, self.config.enable_otlp_compute_top_level_by_span_kind, ); + + // Track last-seen priority for this trace (overwrites previous values) + if let Some(&priority) = dd_span.metrics().get("_sampling_priority_v1") { + priorities_by_id.insert(trace_id, priority as i32); + } + traces_by_id.entry(trace_id).or_default().push(dd_span); } } traces_by_id .into_iter() - .filter_map(|(_, spans)| { + .filter_map(|(trace_id, spans)| { if spans.is_empty() { None } else { - Some(Event::Trace(Trace::new(spans, resource_tags.clone()))) + let mut trace = Trace::new(spans, resource_tags.clone()); + + // Set the trace-level sampling priority if one was found + if let Some(&priority) = priorities_by_id.get(&trace_id) { + trace.set_sampling(Some(TraceSampling::new(false, Some(priority), None, None))); + } + + Some(Event::Trace(trace)) } }) .collect() From 8017f4cf54f84673e854fc68c2ef61261dd78f70 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:51:17 -0500 Subject: [PATCH 09/53] add the trace sampler transform component --- .../src/transforms/trace_sampler/mod.rs | 957 ++++++++++++++++++ 1 file changed, 957 insertions(+) create mode 100644 lib/saluki-components/src/transforms/trace_sampler/mod.rs diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs new file mode 100644 index 0000000000..ddbed6e096 --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -0,0 +1,957 @@ +//! Trace sampling transform. +//! +//! This transform implements agent-side head sampling for traces, supporting: +//! - Probabilistic sampling based on trace ID +//! - User-set priority preservation +//! - Error-based sampling as a safety net +//! - OTLP trace ingestion with proper sampling decision handling + +use async_trait::async_trait; +use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; +use saluki_common::collections::FastHashMap; +use saluki_core::{ + components::{transforms::*, ComponentContext}, + data_model::event::{ + trace::{Span, Trace, TraceSampling}, + Event, EventType, + }, + topology::OutputDefinition, +}; +use saluki_error::GenericError; +use serde::Deserialize; +use stringtheory::MetaString; +use tokio::select; +use tracing::debug; + +mod core_sampler; +mod errors; +mod probabilistic; +mod score_sampler; +mod signature; + +use self::probabilistic::PROB_RATE_KEY; + +// Sampling priority constants (matching datadog-agent) +#[allow(dead_code)] +const PRIORITY_USER_DROP: i32 = -1; +const PRIORITY_AUTO_DROP: i32 = 0; +const PRIORITY_AUTO_KEEP: i32 = 1; +#[allow(dead_code)] +const PRIORITY_USER_KEEP: i32 = 2; + +// TODO: Make error_tps and extra_sample_rate configurable +const ERROR_TPS: f64 = 10.0; // Default error TPS target +const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate + +// Sampling metadata keys / values (matching datadog-agent where applicable). +const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; +const TAG_DECISION_MAKER: &str = "_dd.p.dm"; +const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; + +// Single Span Sampling and Analytics Events keys +const KEY_SPAN_SAMPLING_MECHANISM: &str = "_dd.span_sampling.mechanism"; +const KEY_ANALYZED_SPANS: &str = "_dd.analyzed"; + +// Decision maker values for `_dd.p.dm` (matching datadog-agent). +const DECISION_MAKER_PROBABILISTIC: &str = "-9"; +#[allow(dead_code)] +const DECISION_MAKER_MANUAL_PRIORITY: &str = "-4"; + +enum SamplerName { + Probabilistic, + Error, + Unknown, + // TODO: add NoPriority,Priority,Rare +} + +fn default_sampling_percentage() -> f64 { + 100.0 +} + +fn default_error_sampling_enabled() -> bool { + true +} + +fn default_error_tracking_standalone() -> bool { + false +} + +fn default_probabilistic_sampling_enabled() -> bool { + true +} + +/// Configuration for the trace sampler transform. +#[derive(Debug, Deserialize, Default)] +pub struct TraceSamplerConfiguration { + /// Sampling percentage (0-100). + /// + /// Determines the percentage of traces to keep. A value of 100 keeps all traces, + /// while 50 keeps approximately half. Values outside 0-100 are treated as 100. + /// + /// Defaults to 100.0 (keep all traces). + #[serde(default = "default_sampling_percentage")] + sampling_percentage: f64, + + /// Enable error sampling. + /// + /// When enabled, traces containing errors will be kept even if they would be + /// dropped by probabilistic sampling. This ensures error visibility at low sampling rates. + /// + /// Defaults to `true`. + #[serde(default = "default_error_sampling_enabled")] + error_sampling_enabled: bool, + + /// TODO: implement full functionality, this is just used in an if statement currently. https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L1073-L1080 + #[serde(default = "default_error_tracking_standalone")] + error_tracking_standalone: bool, + + /// Enable probabilistic sampling. + /// + /// When enabled, traces will be sampled probabilistically based on the sampling percentage. + /// + /// Defaults to `true`. + #[serde(default = "default_probabilistic_sampling_enabled")] + probabilistic_sampling_enabled: bool, +} + +#[async_trait] +impl TransformBuilder for TraceSamplerConfiguration { + fn input_event_type(&self) -> EventType { + EventType::Trace + } + + fn outputs(&self) -> &[OutputDefinition] { + static OUTPUTS: &[OutputDefinition] = &[OutputDefinition::default_output(EventType::Trace)]; + OUTPUTS + } + + async fn build(&self, _context: ComponentContext) -> Result, GenericError> { + let sampler = TraceSampler { + sampling_rate: self.sampling_percentage / 100.0, + error_sampling_enabled: self.error_sampling_enabled, + error_tracking_standalone: self.error_tracking_standalone, + probabilistic_sampler_enabled: self.probabilistic_sampling_enabled, + error_sampler: errors::ErrorsSampler::new(ERROR_TPS, ERROR_SAMPLE_RATE), + }; + + Ok(Box::new(sampler)) + } +} + +impl MemoryBounds for TraceSamplerConfiguration { + fn specify_bounds(&self, builder: &mut MemoryBoundsBuilder) { + builder.minimum().with_single_value::("component struct"); + } +} + +pub struct TraceSampler { + sampling_rate: f64, + error_tracking_standalone: bool, + error_sampling_enabled: bool, + probabilistic_sampler_enabled: bool, + error_sampler: errors::ErrorsSampler, +} + +impl TraceSampler { + // TODO: merge this with the other duplicate "find root span of trace" functions + /// Find the root span index of a trace. + fn get_root_span_index(&self, trace: &Trace) -> Option { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/trace.go#L36 + let spans = trace.spans(); + if spans.is_empty() { + return None; + } + let length = spans.len(); + // General case: go over all spans and check for one without a matching parent. + // This intentionally mirrors `datadog-agent/pkg/trace/traceutil/trace.go:GetRoot`: + // - Fast-path: return the last span with `parent_id == 0` (some clients report the root last) + // - Otherwise: build a map of `parent_id -> child_span_index`, delete entries whose parent + // exists in the trace, and pick any remaining "orphan" child span. + let mut parent_id_to_child: FastHashMap = FastHashMap::default(); + + for i in 0..length { + // Common case optimization: check for span with parent_id == 0, starting from the end, + // since some clients report the root last. + let j = length - 1 - i; + if spans[j].parent_id() == 0 { + return Some(j); + } + parent_id_to_child.insert(spans[j].parent_id(), j); + } + + for span in spans.iter() { + parent_id_to_child.remove(&span.span_id()); + } + + // Here, if the trace is valid, we should have `len(parent_id_to_child) == 1`. + if parent_id_to_child.len() != 1 { + debug!( + "Didn't reliably find the root span for traceID:{}", + &spans[0].trace_id() + ); + } + + // Have a safe behavior if that's not the case. + // Pick a random span without its parent. + if let Some((_, child_idx)) = parent_id_to_child.iter().next() { + return Some(*child_idx); + } + + // Gracefully fail with the last span of the trace. + Some(length - 1) + } + + /// Check for user-set sampling priority in trace + fn get_user_priority(&self, trace: &Trace, root_span_idx: usize) -> Option { + // First check trace-level sampling priority (last-seen priority from OTLP ingest) + if let Some(sampling) = trace.sampling() { + if let Some(priority) = sampling.priority { + return Some(priority); + } + } + + if trace.spans().is_empty() { + return None; + } + + // Fall back to checking spans (for compatibility with non-OTLP traces) + // Prefer the root span (common case), but fall back to scanning all spans to be robust to ordering. + if let Some(root) = trace.spans().get(root_span_idx) { + if let Some(&p) = root.metrics().get(SAMPLING_PRIORITY_METRIC_KEY) { + return Some(p as i32); + } + } + let spans = trace.spans(); + spans + .iter() + .find_map(|span| span.metrics().get(SAMPLING_PRIORITY_METRIC_KEY).map(|&p| p as i32)) + } + + /// Returns `true` if the given trace ID should be probabilistically sampled. + fn sample_probabilistic(&self, trace_id: u64) -> bool { + probabilistic::ProbabilisticSampler::sample(trace_id, self.sampling_rate) + } + + /// Returns `true` if the trace contains a span with an error. + fn trace_contains_error(&self, trace: &Trace, consider_exception_span_events: bool) -> bool { + trace.spans().iter().any(|span| { + span.error() != 0 || (consider_exception_span_events && self.span_contains_exception_span_event(span)) + }) + } + + /// Returns `true` if the span has exception span events. + /// + /// This checks for the `_dd.span_events.has_exception` meta field set to `"true"`. + fn span_contains_exception_span_event(&self, span: &Span) -> bool { + if let Some(has_exception) = span.meta().get("_dd.span_events.has_exception") { + return has_exception == "true"; + } + false + } + + /// Check if trace contains spans with Single Span Sampling tags + fn get_single_span_sampled_spans(&self, trace: &Trace) -> Vec { + let mut sampled_spans = Vec::new(); + for span in trace.spans().iter() { + if span.metrics().contains_key(KEY_SPAN_SAMPLING_MECHANISM) { + sampled_spans.push(span.clone()); + } + } + sampled_spans + } + + /// Get spans marked as analyzed (analytics events) + fn get_analyzed_spans(&self, trace: &Trace) -> Vec { + let mut analyzed_spans = Vec::new(); + for span in trace.spans().iter() { + if span.metrics().contains_key(KEY_ANALYZED_SPANS) { + // Keep spans that have the analyzed tag + analyzed_spans.push(span.clone()); + } + } + analyzed_spans + } + + /// Returns `true` if the given trace has any analyzed spans. + fn has_analyzed_spans(&self, trace: &Trace) -> bool { + trace + .spans() + .iter() + .any(|span| span.metrics().contains_key(KEY_ANALYZED_SPANS)) + } + + /// Apply Single Span Sampling to the trace + /// Returns true if the trace was modified + fn single_span_sampling(&self, trace: &mut Trace) -> bool { + let ss_spans = self.get_single_span_sampled_spans(trace); + if !ss_spans.is_empty() { + // Span sampling has kept some spans -> update the trace + trace.set_spans(ss_spans); + // Set high priority and mark as kept + let sampling = TraceSampling::new( + false, + Some(PRIORITY_USER_KEEP), + None, // No decision maker for SSS + Some(MetaString::from(format!("{:.2}", self.sampling_rate))), + ); + trace.set_sampling(Some(sampling)); + true + } else { + false + } + } + + /// Main sampling pipeline - mirrors datadog-agent's runSamplers flow + /// Returns (keep_decision, priority, decision_maker_tag, should_add_prob_rate, root_span_index) + fn run_samplers<'a>(&mut self, trace: &'a mut Trace) -> (bool, i32, &'static str, bool, Option) { + // logic taken from: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L1066 + let now = std::time::SystemTime::now(); + // Empty trace check + if trace.spans().is_empty() { + return (false, PRIORITY_AUTO_DROP, "", false, None); + } + let contains_error = self.trace_contains_error(trace, false); + let Some(root_span_idx) = self.get_root_span_index(trace) else { + return (false, PRIORITY_AUTO_DROP, "", false, None); + }; + + let mut _sampler_name = SamplerName::Unknown; // TODO: add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go + + // TODO: Error Tracking Standalone mode (ETS) + + // TODO: Run RareSampler early to count signatures + + // Modern path: ProbabilisticSamplerEnabled = true + if self.probabilistic_sampler_enabled { + _sampler_name = SamplerName::Probabilistic; + let mut prob_keep = false; + let mut decision_maker = ""; + let mut should_add_prob_rate = false; + + // TODO: Check if rare sampler kept it + + // Run probabilistic sampler - use root span's trace ID + + let root_trace_id = trace.spans()[root_span_idx].trace_id(); + if self.sample_probabilistic(root_trace_id) { + decision_maker = DECISION_MAKER_PROBABILISTIC; // probabilistic sampling + should_add_prob_rate = true; + prob_keep = true; + } else if self.error_sampling_enabled && contains_error { + _sampler_name = SamplerName::Error; + prob_keep = self.error_sampler.sample_error(now, trace, root_span_idx); + } + + let priority = if prob_keep { + PRIORITY_AUTO_KEEP + } else { + PRIORITY_AUTO_DROP + }; + + return ( + prob_keep, + priority, + decision_maker, + should_add_prob_rate, + Some(root_span_idx), + ); + } + + if let Some(user_priority) = self.get_user_priority(trace, root_span_idx) { + if user_priority > 0 { + // User wants to keep this trace + return ( + true, + user_priority, + DECISION_MAKER_MANUAL_PRIORITY, + false, + Some(root_span_idx), + ); + } + } + + // TODO: add missing samplers (priority, no_priority) + + if self.error_sampling_enabled && self.trace_contains_error(trace, false) { + _sampler_name = SamplerName::Error; + let keep = self.error_sampler.sample_error(now, trace, root_span_idx); + if keep { + return (true, PRIORITY_AUTO_KEEP, "", false, Some(root_span_idx)); + } + } + + // Default: drop the trace + (false, PRIORITY_AUTO_DROP, "", false, Some(root_span_idx)) + } + + /// Apply sampling metadata to the trace in-place. + /// + /// The `root_span_id` parameter identifies which span should receive the sampling metadata. + /// This avoids recalculating the root span since it was already found in `run_samplers`. + fn apply_sampling_metadata( + &self, trace: &mut Trace, keep: bool, priority: i32, decision_maker: &str, add_prob_rate: bool, root_span_idx: usize, + ) { + let root_span_value = match trace.spans_mut().get_mut(root_span_idx) { + Some(span) => span, + None => return, + }; + + // Add metrics + let metrics = root_span_value.metrics_mut(); + metrics.insert(MetaString::from(SAMPLING_PRIORITY_METRIC_KEY), priority as f64); + + // Add the probabilistic sampling rate if requested + if add_prob_rate { + metrics.insert(MetaString::from(PROB_RATE_KEY), self.sampling_rate); + } + + // Add tag for the decision maker + let meta = root_span_value.meta_mut(); + if priority > 0 && !decision_maker.is_empty() { + meta.insert(MetaString::from(TAG_DECISION_MAKER), MetaString::from(decision_maker)); + } + + // Add the sampling rate tag for observability + meta.insert( + MetaString::from(TAG_OTLP_SAMPLING_RATE), + MetaString::from(format!("{:.2}", self.sampling_rate)), + ); + + // Now we can use trace again to set sampling metadata + let sampling = TraceSampling::new( + !keep, + Some(priority), + if priority > 0 && !decision_maker.is_empty() { + Some(MetaString::from(decision_maker)) + } else { + None + }, + Some(MetaString::from(format!("{:.2}", self.sampling_rate))), + ); + trace.set_sampling(Some(sampling)); + } +} + +#[async_trait] +impl Transform for TraceSampler { + // run takes `self: Box`, and not &self, so it consumes the `TraceSampler` instance, after run starts there is a single owner of the sampler for the lifetime of the task. This means + // that no internal locking is necessary unlike the agent code referenced. + async fn run(mut self: Box, mut context: TransformContext) -> Result<(), GenericError> { + let mut health = context.take_health_handle(); + health.mark_ready(); + + debug!("Trace sampler transform started."); + + loop { + select! { + _ = health.live() => continue, + maybe_events = context.events().next() => match maybe_events { + Some(events) => { + for event in events { + match event { + Event::Trace(mut trace) => { + // keep is a boolean that indicates if the trace should be kept or dropped + // priority is the sampling priority + // decision_maker is the tag that indicates the decision maker (probabilistic, error, etc.) + // add_prob_rate is a boolean that indicates if the PROB_RATE_KEY should be added to the the root span + // root_span_idx is the index of the root span of the trace + let (keep, priority, decision_maker, add_prob_rate, root_span_idx) = + self.run_samplers(&mut trace); + if keep { + if let Some(root_idx) = root_span_idx { + self.apply_sampling_metadata( + &mut trace, + keep, + priority, + decision_maker, + add_prob_rate, + root_idx, + ); + } + + // Send the trace to the next component + let mut dispatcher = context + .dispatcher() + .buffered() + .expect("default output should always exist"); + dispatcher.push(Event::Trace(trace)).await?; + dispatcher.flush().await?; + } else if !self.error_tracking_standalone { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L980-L990 + + // try single span sampling (keeps spans marked for sampling when trace would be dropped) + let modified = self.single_span_sampling(&mut trace); + if !modified { + // Fall back to analytics events if no SSS spans + let analyzed_spans = self.get_analyzed_spans(&trace); + if !analyzed_spans.is_empty() { + // Replace trace spans with analyzed events + trace.set_spans(analyzed_spans); + // Mark trace as kept with high priority + let sampling = TraceSampling::new( + false, + Some(PRIORITY_USER_KEEP), + None, + Some(MetaString::from(format!("{:.2}", self.sampling_rate))), + ); + trace.set_sampling(Some(sampling)); + + // Send the modified trace downstream + let mut dispatcher = context + .dispatcher() + .buffered() + .expect("default output should always exist"); + dispatcher.push(Event::Trace(trace)).await?; + dispatcher.flush().await?; + continue; // Skip to next event + } + } else if self.has_analyzed_spans(&trace) { + // Warn about both SSS and analytics events + debug!("Detected both analytics events AND single span sampling in the same trace. Single span sampling wins because App Analytics is deprecated."); + + // Send the SSS-modified trace downstream + let mut dispatcher = context + .dispatcher() + .buffered() + .expect("default output should always exist"); + dispatcher.push(Event::Trace(trace)).await?; + dispatcher.flush().await?; + continue; // Skip to next event + } + + // If we modified the trace with SSS, send it + if modified { + let mut dispatcher = context + .dispatcher() + .buffered() + .expect("default output should always exist"); + dispatcher.push(Event::Trace(trace)).await?; + dispatcher.flush().await?; + } else { + // Neither SSS nor analytics events found, drop the trace + debug!("Dropping trace with priority {}", priority); + } + } + } + other => { + // Pass through non-trace events + let mut dispatcher = context + .dispatcher() + .buffered() + .expect("default output should always exist"); + dispatcher.push(other).await?; + dispatcher.flush().await?; + } + } + } + } + None => { + debug!("Event stream terminated, shutting down trace sampler transform"); + break; + } + } + } + } + + debug!("Trace sampler transform stopped."); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use saluki_context::tags::TagSet; + use saluki_core::data_model::event::trace::{Span as DdSpan, Trace}; + + use super::*; + + fn create_test_sampler() -> TraceSampler { + TraceSampler { + sampling_rate: 1.0, + error_sampling_enabled: true, + error_tracking_standalone: false, + probabilistic_sampler_enabled: true, + error_sampler: errors::ErrorsSampler::new(10.0, 1.0), + } + } + + fn create_test_span(trace_id: u64, span_id: u64, error: i32) -> DdSpan { + DdSpan::new( + MetaString::from("test-service"), + MetaString::from("test-operation"), + MetaString::from("test-resource"), + MetaString::from("test-type"), + trace_id, + span_id, + 0, // parent_id + 0, // start + 1000, // duration + error, + ) + } + + fn create_test_span_with_metrics(trace_id: u64, span_id: u64, metrics: HashMap) -> DdSpan { + let mut metrics_map = saluki_common::collections::FastHashMap::default(); + for (k, v) in metrics { + metrics_map.insert(MetaString::from(k), v); + } + create_test_span(trace_id, span_id, 0).with_metrics(metrics_map) + } + + #[allow(dead_code)] + fn create_test_span_with_meta(trace_id: u64, span_id: u64, meta: HashMap) -> DdSpan { + let mut meta_map = saluki_common::collections::FastHashMap::default(); + for (k, v) in meta { + meta_map.insert(MetaString::from(k), MetaString::from(v)); + } + create_test_span(trace_id, span_id, 0).with_meta(meta_map) + } + + fn create_test_trace(spans: Vec) -> Trace { + let tags = TagSet::default(); + Trace::new(spans, tags) + } + + #[test] + fn test_user_priority_detection() { + let sampler = create_test_sampler(); + + // Test trace with user-set priority = 2 (UserKeep) + let mut metrics = HashMap::new(); + metrics.insert("_sampling_priority_v1".to_string(), 2.0); + let span = create_test_span_with_metrics(12345, 1, metrics); + let trace = create_test_trace(vec![span]); + let root_idx = sampler.get_root_span_index(&trace).unwrap(); + + assert_eq!(sampler.get_user_priority(&trace, root_idx), Some(2)); + + // Test trace with user-set priority = -1 (UserDrop) + let mut metrics = HashMap::new(); + metrics.insert("_sampling_priority_v1".to_string(), -1.0); + let span = create_test_span_with_metrics(12345, 1, metrics); + let trace = create_test_trace(vec![span]); + let root_idx = sampler.get_root_span_index(&trace).unwrap(); + + assert_eq!(sampler.get_user_priority(&trace, root_idx), Some(-1)); + + // Test trace without user priority + let span = create_test_span(12345, 1, 0); + let trace = create_test_trace(vec![span]); + let root_idx = sampler.get_root_span_index(&trace).unwrap(); + + assert_eq!(sampler.get_user_priority(&trace, root_idx), None); + } + + #[test] + fn test_trace_level_priority_takes_precedence() { + let sampler = create_test_sampler(); + + // Test trace-level priority overrides span priorities (last-seen priority) + // Create spans with different priorities - root has 0, later span has 2 + let mut metrics_root = HashMap::new(); + metrics_root.insert("_sampling_priority_v1".to_string(), 0.0); + let root_span = create_test_span_with_metrics(12345, 1, metrics_root); + + let mut metrics_later = HashMap::new(); + metrics_later.insert("_sampling_priority_v1".to_string(), 1.0); + let later_span = create_test_span_with_metrics(12345, 2, metrics_later) + .with_parent_id(1); + + let mut trace = create_test_trace(vec![root_span, later_span]); + let root_idx = sampler.get_root_span_index(&trace).unwrap(); + + // Without trace-level priority, should get priority from root (0) + assert_eq!(sampler.get_user_priority(&trace, root_idx), Some(0)); + + // Now set trace-level priority to 2 (simulating last-seen priority from OTLP translator) + trace.set_sampling(Some(TraceSampling::new(false, Some(2), None, None))); + + // Trace-level priority should take precedence + assert_eq!(sampler.get_user_priority(&trace, root_idx), Some(2)); + + // Test that trace-level priority is used even when no span has priority + let span_no_priority = create_test_span(12345, 3, 0); + let mut trace_only_trace_level = create_test_trace(vec![span_no_priority]); + trace_only_trace_level.set_sampling(Some(TraceSampling::new(false, Some(1), None, None))); + let root_idx = sampler.get_root_span_index(&trace_only_trace_level).unwrap(); + + assert_eq!(sampler.get_user_priority(&trace_only_trace_level, root_idx), Some(1)); + } + + #[test] + fn test_manual_keep_with_trace_level_priority() { + let mut sampler = create_test_sampler(); + sampler.probabilistic_sampler_enabled = false; // Use legacy path that checks user priority + + // Test that manual keep (priority = 2) works via trace-level priority + let span = create_test_span(12345, 1, 0); + let mut trace = create_test_trace(vec![span]); + trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_USER_KEEP), None, None))); + + let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + assert!(keep); + assert_eq!(priority, PRIORITY_USER_KEEP); + assert_eq!(decision_maker, DECISION_MAKER_MANUAL_PRIORITY); + + // Test manual drop (priority = -1) via trace-level priority + let span = create_test_span(12345, 1, 0); + let mut trace = create_test_trace(vec![span]); + trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_USER_DROP), None, None))); + + let (keep, priority, _, _, _) = sampler.run_samplers(&mut trace); + assert!(!keep); // Should not keep when user drops + assert_eq!(priority, PRIORITY_AUTO_DROP); // Fallthrough to auto-drop + + // Test that priority = 1 (auto keep) via trace-level is also respected + let span = create_test_span(12345, 1, 0); + let mut trace = create_test_trace(vec![span]); + trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_AUTO_KEEP), None, None))); + + let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + assert!(keep); + assert_eq!(priority, PRIORITY_AUTO_KEEP); + assert_eq!(decision_maker, DECISION_MAKER_MANUAL_PRIORITY); + } + + #[test] + fn test_probabilistic_sampling_determinism() { + let sampler = create_test_sampler(); + + // Same trace ID should always produce same decision + let trace_id = 0x1234567890ABCDEF_u64; + let result1 = sampler.sample_probabilistic(trace_id); + let result2 = sampler.sample_probabilistic(trace_id); + assert_eq!(result1, result2); + } + + #[test] + fn test_error_detection() { + let sampler = create_test_sampler(); + + // Test trace with error field set + let span_with_error = create_test_span(12345, 1, 1); + let trace = create_test_trace(vec![span_with_error]); + assert!(sampler.trace_contains_error(&trace, false)); + + // Test trace without error + let span_without_error = create_test_span(12345, 1, 0); + let trace = create_test_trace(vec![span_without_error]); + assert!(!sampler.trace_contains_error(&trace, false)); + } + + #[test] + fn test_sampling_priority_order() { + // Test modern path: error sampler overrides probabilistic drop + let mut sampler = create_test_sampler(); + sampler.sampling_rate = 0.5; // 50% sampling rate + sampler.probabilistic_sampler_enabled = true; + + // Create trace with error that would be dropped by probabilistic + // Using a trace ID that we know will be dropped at 50% rate + let span_with_error = create_test_span(u64::MAX - 1, 1, 1); + let mut trace = create_test_trace(vec![span_with_error]); + + let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + assert!(keep); + assert_eq!(priority, PRIORITY_AUTO_KEEP); + assert_eq!(decision_maker, ""); // Error sampler doesn't set decision_maker + + // Test legacy path: user priority is respected + let mut sampler = create_test_sampler(); + sampler.probabilistic_sampler_enabled = false; // Use legacy path + + let mut metrics = HashMap::new(); + metrics.insert("_sampling_priority_v1".to_string(), 2.0); + let span = create_test_span_with_metrics(12345, 1, metrics); + let mut trace = create_test_trace(vec![span]); + + let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + assert!(keep); + assert_eq!(priority, 2); // UserKeep + assert_eq!(decision_maker, DECISION_MAKER_MANUAL_PRIORITY); // manual decision + } + + #[test] + fn test_empty_trace_handling() { + let mut sampler = create_test_sampler(); + let mut trace = create_test_trace(vec![]); + + let (keep, priority, _, _, _) = sampler.run_samplers(&mut trace); + assert!(!keep); + assert_eq!(priority, PRIORITY_AUTO_DROP); + } + + #[test] + fn test_root_span_detection() { + let sampler = create_test_sampler(); + + // Test 1: Root span with parent_id = 0 (common case) + let root_span = DdSpan::new( + MetaString::from("service"), + MetaString::from("operation"), + MetaString::from("resource"), + MetaString::from("type"), + 12345, + 1, + 0, // parent_id = 0 indicates root + 0, + 1000, + 0, + ); + let child_span = DdSpan::new( + MetaString::from("service"), + MetaString::from("child_op"), + MetaString::from("resource"), + MetaString::from("type"), + 12345, + 2, + 1, // parent_id = 1 (points to root) + 100, + 500, + 0, + ); + // Put root span second to test that we find it even when not first + let trace = create_test_trace(vec![child_span.clone(), root_span.clone()]); + let root_idx = sampler.get_root_span_index(&trace).unwrap(); + assert_eq!(trace.spans()[root_idx].span_id(), 1); + + // Test 2: Orphaned span (parent not in trace) + let orphan_span = DdSpan::new( + MetaString::from("service"), + MetaString::from("orphan"), + MetaString::from("resource"), + MetaString::from("type"), + 12345, + 3, + 999, // parent_id = 999 (doesn't exist in trace) + 200, + 300, + 0, + ); + let trace = create_test_trace(vec![orphan_span]); + let root_idx = sampler.get_root_span_index(&trace).unwrap(); + assert_eq!(trace.spans()[root_idx].span_id(), 3); + + // Test 3: Multiple root candidates: should return the last one found (index 1) + let span1 = create_test_span(12345, 1, 0); + let span2 = create_test_span(12345, 2, 0); + let trace = create_test_trace(vec![span1, span2]); + // Both have parent_id = 0, should return the last one found (span_id = 2) + let root_idx = sampler.get_root_span_index(&trace).unwrap(); + assert_eq!(trace.spans()[root_idx].span_id(), 2); + } + + #[test] + fn test_single_span_sampling() { + let mut sampler = create_test_sampler(); + + // Test 1: Trace with SSS tags should be kept even when probabilistic would drop it + sampler.sampling_rate = 0.0; // 0% sampling rate - should drop everything + sampler.probabilistic_sampler_enabled = true; + + // Create span with SSS metric + let mut metrics_map = saluki_common::collections::FastHashMap::default(); + metrics_map.insert(MetaString::from(KEY_SPAN_SAMPLING_MECHANISM), 8.0); // Any value + let sss_span = create_test_span(12345, 1, 0).with_metrics(metrics_map.clone()); + + // Create regular span without SSS + let regular_span = create_test_span(12345, 2, 0); + + let mut trace = create_test_trace(vec![sss_span.clone(), regular_span]); + + // Apply SSS + let modified = sampler.single_span_sampling(&mut trace); + assert!(modified); + assert_eq!(trace.spans().len(), 1); // Only SSS span kept + assert_eq!(trace.spans()[0].span_id(), 1); // It's the SSS span + + // Check that trace has been marked as kept with high priority + assert!(trace.sampling().is_some()); + assert_eq!(trace.sampling().as_ref().unwrap().priority, Some(PRIORITY_USER_KEEP)); + + // Test 2: Trace without SSS tags should not be modified + let trace_without_sss = create_test_trace(vec![create_test_span(12345, 3, 0)]); + let mut trace_copy = trace_without_sss.clone(); + let modified = sampler.single_span_sampling(&mut trace_copy); + assert!(!modified); + assert_eq!(trace_copy.spans().len(), trace_without_sss.spans().len()); + } + + #[test] + fn test_analytics_events() { + let sampler = create_test_sampler(); + + // Test 1: Trace with analyzed spans + let mut metrics_map = saluki_common::collections::FastHashMap::default(); + metrics_map.insert(MetaString::from(KEY_ANALYZED_SPANS), 1.0); + let analyzed_span = create_test_span(12345, 1, 0).with_metrics(metrics_map.clone()); + let regular_span = create_test_span(12345, 2, 0); + + let trace = create_test_trace(vec![analyzed_span.clone(), regular_span]); + + let analyzed_spans = sampler.get_analyzed_spans(&trace); + assert_eq!(analyzed_spans.len(), 1); + assert_eq!(analyzed_spans[0].span_id(), 1); + + assert!(sampler.has_analyzed_spans(&trace)); + + // Test 2: Trace without analyzed spans + let trace_no_analytics = create_test_trace(vec![create_test_span(12345, 3, 0)]); + let analyzed_spans = sampler.get_analyzed_spans(&trace_no_analytics); + assert!(analyzed_spans.is_empty()); + assert!(!sampler.has_analyzed_spans(&trace_no_analytics)); + } + + #[test] + fn test_probabilistic_sampling_with_prob_rate_key() { + let mut sampler = create_test_sampler(); + sampler.sampling_rate = 0.75; // 75% sampling rate + sampler.probabilistic_sampler_enabled = true; + + // Use a trace ID that we know will be sampled + let trace_id = 12345_u64; + let root_span = DdSpan::new( + MetaString::from("service"), + MetaString::from("operation"), + MetaString::from("resource"), + MetaString::from("type"), + trace_id, + 1, + 0, // parent_id = 0 indicates root + 0, + 1000, + 0, + ); + let mut trace = create_test_trace(vec![root_span]); + + let (keep, priority, decision_maker, add_prob_rate, root_span_idx) = sampler.run_samplers(&mut trace); + + if keep && decision_maker == DECISION_MAKER_PROBABILISTIC { + // If sampled probabilistically, check probRateKey should be added + assert_eq!(priority, PRIORITY_AUTO_KEEP); + assert_eq!(decision_maker, DECISION_MAKER_PROBABILISTIC); // probabilistic sampling marker + assert!(add_prob_rate); // Should add prob_rate_key + + // Use root span index directly + let root_idx = root_span_idx.unwrap_or(0); + + // Test that metadata is applied correctly + let mut trace_with_metadata = trace.clone(); + sampler.apply_sampling_metadata( + &mut trace_with_metadata, + keep, + priority, + decision_maker, + add_prob_rate, + root_idx, + ); + + // Check that the root span has the probRateKey + let modified_root = &trace_with_metadata.spans()[0]; + assert!(modified_root.metrics().contains_key(PROB_RATE_KEY)); + assert_eq!(*modified_root.metrics().get(PROB_RATE_KEY).unwrap(), 0.75); + } + } +} From a32499f6ecbc4bd297fdd9c17701ea55d87070f0 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:51:35 -0500 Subject: [PATCH 10/53] add helper class to compute signature --- .../src/transforms/trace_sampler/signature.rs | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 lib/saluki-components/src/transforms/trace_sampler/signature.rs diff --git a/lib/saluki-components/src/transforms/trace_sampler/signature.rs b/lib/saluki-components/src/transforms/trace_sampler/signature.rs new file mode 100644 index 0000000000..29a48d7f97 --- /dev/null +++ b/lib/saluki-components/src/transforms/trace_sampler/signature.rs @@ -0,0 +1,99 @@ +//! Trace signature computation utilities. +//! +//! This module currently provides: +//! - a small FNV-1a 32-bit helper (used by probabilistic sampling) +//! - a signature newtype + compute helper (for score/TPS samplers) + +#![allow(dead_code)] + +use saluki_core::data_model::event::trace::{Span, Trace}; +use stringtheory::MetaString; + +const OFFSET_32: u32 = 2166136261; +const PRIME_32: u32 = 16777619; + +fn write_hash(mut hash: u32, bytes: &[u8]) -> u32 { + for &b in bytes { + hash ^= b as u32; + hash = hash.wrapping_mul(PRIME_32); + } + hash +} + +pub(super) fn fnv1a_32(seed: &[u8], bytes: &[u8]) -> u32 { + const OFFSET_32: u32 = 0x811c_9dc5; + let hash = write_hash(OFFSET_32, seed); + write_hash(hash, bytes) +} + +#[allow(dead_code)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub(super) struct Signature(pub(super) u64); + +const KEY_HTTP_STATUS_CODE: &str = "http.status_code"; +const KEY_ERROR_TYPE: &str = "error.type"; + +fn get_trace_env<'a>(trace: &'a Trace, root_span_idx: usize) -> Option<&'a MetaString> { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/trace.go#L19-L20 + let env = trace.spans().get(root_span_idx).and_then(|span| span.meta().get("env")); + match env { + Some(env) => return Some(env), + None => { + for span in trace.spans().iter() { + if let Some(env) = span.meta().get("env"){ + return Some(env) + } + + } + return None; + } + } +} + +pub(super) fn compute_signature_with_root_and_env(trace: &Trace, root_idx: usize) -> Signature { + // Mirrors datadog-agent/pkg/trace/sampler/signature.go:computeSignatureWithRootAndEnv. + // + // Signature based on the hash of (env, service, name, resource, is_error) for the root, plus the set of + // (env, service, name, is_error) of each span. + let spans = trace.spans(); + let Some(root) = spans.get(root_idx) else { + return Signature(0); + }; + + let env = get_trace_env(trace, root_idx).map(|v| v.as_ref()).unwrap_or(""); + let root_hash = compute_span_hash(root, env, true); + let mut span_hashes: Vec = spans.iter().map(|span| compute_span_hash(span, env, false)).collect(); + + if span_hashes.is_empty() { + return Signature(root_hash as u64); + } + + // Sort, dedupe then merge all the hashes to build the signature. + span_hashes.sort_unstable(); + span_hashes.dedup(); + + let mut trace_hash = span_hashes[0] ^ root_hash; + for &h in span_hashes.iter().skip(1) { + trace_hash ^= h; + } + + Signature(trace_hash as u64) +} + +fn compute_span_hash(span: &Span, env: &str, with_resource: bool) -> u32 { + let mut h = OFFSET_32; + h = write_hash(h, env.as_bytes()); + h = write_hash(h, span.service().as_bytes()); + h = write_hash(h, span.name().as_bytes()); + h = write_hash(h, &[span.error() as u8]); + if with_resource { + h = write_hash(h, span.resource().as_bytes()); + } + if let Some(code) = span.meta().get(KEY_HTTP_STATUS_CODE) { + h = write_hash(h, code.as_ref().as_bytes()); + } + if let Some(typ) = span.meta().get(KEY_ERROR_TYPE) { + h = write_hash(h, typ.as_ref().as_bytes()); + } + h +} From df459877a4384786a8cb5822dd22589a1ca2d156 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 10:52:22 -0500 Subject: [PATCH 11/53] modified the trace encoder to use sampling info from the transform --- .../src/encoders/datadog/traces/mod.rs | 52 +++++++++++++------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index 3b4665a468..98231b828d 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -62,6 +62,12 @@ const CONTAINER_TAGS_META_KEY: &str = "_dd.tags.container"; const MAX_TRACES_PER_PAYLOAD: usize = 10000; static CONTENT_TYPE_PROTOBUF: HeaderValue = HeaderValue::from_static("application/x-protobuf"); +// Sampling metadata keys / values. +const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; +const TAG_DECISION_MAKER: &str = "_dd.p.dm"; +const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; +const DEFAULT_CHUNK_PRIORITY: i32 = 1; // PRIORITY_AUTO_KEEP + fn default_serializer_compressor_kind() -> String { "zstd".to_string() } @@ -547,28 +553,40 @@ impl TraceEndpointEncoder { } fn build_trace_chunk(&self, trace: &Trace) -> TraceChunk { - let mut spans: Vec = trace.spans().iter().map(convert_span).collect(); + let spans: Vec = trace.spans().iter().map(convert_span).collect(); let mut chunk = TraceChunk::new(); - let rate = self.sampling_rate(); let mut tags = std::collections::HashMap::new(); - tags.insert("_dd.otlp_sr".to_string(), format!("{:.2}", rate)); - - // TODO: Remove this once we have sampling. We have to hardcode the priority to 1 for now so that intake does not drop the trace. - const PRIORITY_AUTO_KEEP: i32 = 1; - chunk.set_priority(PRIORITY_AUTO_KEEP); - - // Set _dd.p.dm (decision maker) - // Only set if sampling priority is "keep" (which it is, since we set PRIORITY_AUTO_KEEP) - // Decision maker "-9" indicates probabilistic sampler made the decision - const DECISION_MAKER: &str = "-9"; - if let Some(first_span) = spans.first_mut() { - let mut meta = first_span.take_meta(); - meta.insert("_dd.p.dm".to_string(), DECISION_MAKER.to_string()); - first_span.set_meta(meta); + + // Use trace-level sampling metadata if available (set by the trace sampler transform). + // This provides explicit trace-level sampling information without needing to scan spans. + if let Some(sampling) = trace.sampling() { + // Set priority from trace metadata + chunk.set_priority(sampling.priority.unwrap_or(DEFAULT_CHUNK_PRIORITY)); + chunk.set_droppedTrace(sampling.dropped_trace); + + // Set decision maker tag if present + if let Some(dm) = &sampling.decision_maker { + tags.insert(TAG_DECISION_MAKER.to_string(), dm.to_string()); + } + + // Set OTLP sampling rate tag if present (from sampler) + if let Some(otlp_sr) = &sampling.otlp_sampling_rate { + tags.insert(TAG_OTLP_SAMPLING_RATE.to_string(), otlp_sr.to_string()); + } else { + // Fallback to encoder's computed rate + let rate = self.sampling_rate(); + tags.insert(TAG_OTLP_SAMPLING_RATE.to_string(), format!("{:.2}", rate)); + } + } else { + // Fallback: if trace.sampling is None, use defaults + // (No span scanning per the plan's "no fallback scan" requirement) + chunk.set_priority(DEFAULT_CHUNK_PRIORITY); + chunk.set_droppedTrace(false); + let rate = self.sampling_rate(); + tags.insert(TAG_OTLP_SAMPLING_RATE.to_string(), format!("{:.2}", rate)); } - tags.insert("_dd.p.dm".to_string(), DECISION_MAKER.to_string()); chunk.set_tags(tags); chunk.set_spans(spans); From f5b45ed29a25852ed6041d27cbd50b8b2f8e65c4 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 21 Jan 2026 11:12:43 -0500 Subject: [PATCH 12/53] make check clippy + fmt --- .../transforms/trace_sampler/core_sampler.rs | 6 +++--- .../src/transforms/trace_sampler/mod.rs | 18 +++++++++--------- .../src/transforms/trace_sampler/signature.rs | 11 +++++------ .../src/data_model/event/trace/mod.rs | 5 +++-- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs index b354838ae9..f23fd6b2f5 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs @@ -64,7 +64,7 @@ fn zero_and_get_max(buckets: &mut [f32; NUM_BUCKETS], previous_bucket: u64, new_ // is low volume and does not use all of its TPS, the remaining is spread uniformly // on all other signatures. The returned sig_target is the final per_signature TPS target // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/coresampler.go#L167 -fn compute_tps_per_sig(target_tps: f64, seen_tps: &Vec) -> f64 { +fn compute_tps_per_sig(target_tps: f64, seen_tps: &[f64]) -> f64 { // Example: target_tps = 30, seen_tps = [5, 10, 100] → sorted stays [5, 10, 100], Initial sig_target = 30 / 3 = 10 // Loop: // 1) c = 5 (< 10), so subtract: target_tps = 30 - 5 = 25 @@ -74,7 +74,7 @@ fn compute_tps_per_sig(target_tps: f64, seen_tps: &Vec) -> f64 { // 3) Next is last element, break. // Return sig_target = 15. // Interpretation: the low‑volume signatures "use up" 5 and 10 TPS, and the remaining budget (15) is the per‑signature target for the higher‑volume signature(s). - let mut sorted: Vec = seen_tps.clone(); + let mut sorted: Vec = seen_tps.to_vec(); sorted.sort_by(|a, b| a.total_cmp(b)); // compute the initial per_signature TPS budget by splitting target_tps across all signatures. let mut remaining_tps = target_tps; @@ -112,7 +112,7 @@ impl Sampler { let buckets = self.seen.entry(*signature).or_insert([0 as f32; NUM_BUCKETS]); self.all_sigs_seen[(bucket_id % (NUM_BUCKETS as u64)) as usize] += n; buckets[(bucket_id % (NUM_BUCKETS as u64)) as usize] += n; - return update_rate; + update_rate } // update_rates distributes TPS on each signature and apply it to the moving diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index ddbed6e096..a0cb0151f1 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -227,7 +227,7 @@ impl TraceSampler { .find_map(|span| span.metrics().get(SAMPLING_PRIORITY_METRIC_KEY).map(|&p| p as i32)) } - /// Returns `true` if the given trace ID should be probabilistically sampled. + /// Returns `true` if the given trace ID should be probabilistically sampled. fn sample_probabilistic(&self, trace_id: u64) -> bool { probabilistic::ProbabilisticSampler::sample(trace_id, self.sampling_rate) } @@ -247,7 +247,7 @@ impl TraceSampler { return has_exception == "true"; } false - } + } /// Check if trace contains spans with Single Span Sampling tags fn get_single_span_sampled_spans(&self, trace: &Trace) -> Vec { @@ -303,7 +303,7 @@ impl TraceSampler { /// Main sampling pipeline - mirrors datadog-agent's runSamplers flow /// Returns (keep_decision, priority, decision_maker_tag, should_add_prob_rate, root_span_index) - fn run_samplers<'a>(&mut self, trace: &'a mut Trace) -> (bool, i32, &'static str, bool, Option) { + fn run_samplers(&mut self, trace: &mut Trace) -> (bool, i32, &'static str, bool, Option) { // logic taken from: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L1066 let now = std::time::SystemTime::now(); // Empty trace check @@ -315,7 +315,7 @@ impl TraceSampler { return (false, PRIORITY_AUTO_DROP, "", false, None); }; - let mut _sampler_name = SamplerName::Unknown; // TODO: add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go + let mut _sampler_name = SamplerName::Unknown; // TODO: add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go // TODO: Error Tracking Standalone mode (ETS) @@ -389,7 +389,8 @@ impl TraceSampler { /// The `root_span_id` parameter identifies which span should receive the sampling metadata. /// This avoids recalculating the root span since it was already found in `run_samplers`. fn apply_sampling_metadata( - &self, trace: &mut Trace, keep: bool, priority: i32, decision_maker: &str, add_prob_rate: bool, root_span_idx: usize, + &self, trace: &mut Trace, keep: bool, priority: i32, decision_maker: &str, add_prob_rate: bool, + root_span_idx: usize, ) { let root_span_value = match trace.spans_mut().get_mut(root_span_idx) { Some(span) => span, @@ -435,7 +436,7 @@ impl TraceSampler { #[async_trait] impl Transform for TraceSampler { // run takes `self: Box`, and not &self, so it consumes the `TraceSampler` instance, after run starts there is a single owner of the sampler for the lifetime of the task. This means - // that no internal locking is necessary unlike the agent code referenced. + // that no internal locking is necessary unlike the agent code referenced. async fn run(mut self: Box, mut context: TransformContext) -> Result<(), GenericError> { let mut health = context.take_health_handle(); health.mark_ready(); @@ -656,8 +657,7 @@ mod tests { let mut metrics_later = HashMap::new(); metrics_later.insert("_sampling_priority_v1".to_string(), 1.0); - let later_span = create_test_span_with_metrics(12345, 2, metrics_later) - .with_parent_id(1); + let later_span = create_test_span_with_metrics(12345, 2, metrics_later).with_parent_id(1); let mut trace = create_test_trace(vec![root_span, later_span]); let root_idx = sampler.get_root_span_index(&trace).unwrap(); @@ -701,7 +701,7 @@ mod tests { trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_USER_DROP), None, None))); let (keep, priority, _, _, _) = sampler.run_samplers(&mut trace); - assert!(!keep); // Should not keep when user drops + assert!(!keep); // Should not keep when user drops assert_eq!(priority, PRIORITY_AUTO_DROP); // Fallthrough to auto-drop // Test that priority = 1 (auto keep) via trace-level is also respected diff --git a/lib/saluki-components/src/transforms/trace_sampler/signature.rs b/lib/saluki-components/src/transforms/trace_sampler/signature.rs index 29a48d7f97..47e9327c68 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/signature.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/signature.rs @@ -33,19 +33,18 @@ pub(super) struct Signature(pub(super) u64); const KEY_HTTP_STATUS_CODE: &str = "http.status_code"; const KEY_ERROR_TYPE: &str = "error.type"; -fn get_trace_env<'a>(trace: &'a Trace, root_span_idx: usize) -> Option<&'a MetaString> { +fn get_trace_env(trace: &Trace, root_span_idx: usize) -> Option<&MetaString> { // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/trace.go#L19-L20 let env = trace.spans().get(root_span_idx).and_then(|span| span.meta().get("env")); match env { - Some(env) => return Some(env), + Some(env) => Some(env), None => { for span in trace.spans().iter() { - if let Some(env) = span.meta().get("env"){ - return Some(env) + if let Some(env) = span.meta().get("env") { + return Some(env); } - } - return None; + None } } } diff --git a/lib/saluki-core/src/data_model/event/trace/mod.rs b/lib/saluki-core/src/data_model/event/trace/mod.rs index 5814489ab5..422581ee41 100644 --- a/lib/saluki-core/src/data_model/event/trace/mod.rs +++ b/lib/saluki-core/src/data_model/event/trace/mod.rs @@ -10,7 +10,7 @@ use stringtheory::MetaString; /// typically set by the trace sampler and consumed by the encoder. #[derive(Clone, Debug, PartialEq)] pub struct TraceSampling { - /// Whether or not the trace was dropped during sampling. + /// Whether or not the trace was dropped during sampling. pub dropped_trace: bool, /// The sampling priority assigned to this trace. /// @@ -39,7 +39,8 @@ pub struct TraceSampling { impl TraceSampling { /// Creates a new `TraceSampling` instance. pub fn new( - dropped_trace:bool, priority: Option, decision_maker: Option, otlp_sampling_rate: Option, + dropped_trace: bool, priority: Option, decision_maker: Option, + otlp_sampling_rate: Option, ) -> Self { Self { dropped_trace, From 76ab423e49cf4a5b15da10a0fd525f93fc5ce449 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 22 Jan 2026 16:41:47 -0500 Subject: [PATCH 13/53] wire up component --- bin/agent-data-plane/src/cli/run.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/agent-data-plane/src/cli/run.rs b/bin/agent-data-plane/src/cli/run.rs index 93c238f53c..394c84413c 100644 --- a/bin/agent-data-plane/src/cli/run.rs +++ b/bin/agent-data-plane/src/cli/run.rs @@ -25,6 +25,7 @@ use saluki_components::{ transforms::{ AggregateConfiguration, ApmStatsTransformConfiguration, ChainedConfiguration, DogstatsDMapperConfiguration, DogstatsDPrefixFilterConfiguration, HostEnrichmentConfiguration, HostTagsConfiguration, + TraceSamplerConfiguration, }, }; use saluki_config::{ConfigurationLoader, GenericConfiguration}; @@ -327,6 +328,7 @@ async fn add_baseline_traces_pipeline_to_blueprint( .await?; let dd_traces_enrich_config = ChainedConfiguration::default().with_transform_builder("apm_onboarding", ApmOnboardingConfiguration); + let trace_sampler_config = TraceSamplerConfiguration::default(); let apm_stats_transform_config = ApmStatsTransformConfiguration::from_configuration(config) .error_context("Failed to configure APM Stats transform.")? .with_environment_provider(env_provider.clone()) @@ -338,11 +340,13 @@ async fn add_baseline_traces_pipeline_to_blueprint( blueprint .add_transform("traces_enrich", dd_traces_enrich_config)? + .add_transform("trace_sampler", trace_sampler_config)? .add_transform("dd_apm_stats", apm_stats_transform_config)? .add_encoder("dd_stats_encode", dd_apm_stats_encoder)? .add_encoder("dd_traces_encode", dd_traces_config)? - .connect_component("dd_apm_stats", ["traces_enrich"])? - .connect_component("dd_traces_encode", ["traces_enrich"])? + .connect_component("trace_sampler", ["traces_enrich"])? + .connect_component("dd_apm_stats", ["trace_sampler"])? + .connect_component("dd_traces_encode", ["trace_sampler"])? .connect_component("dd_stats_encode", ["dd_apm_stats"])? .connect_component("dd_out", ["dd_traces_encode", "dd_stats_encode"])?; From 321b7f781b208fef88193c2cd03892ddc3a186f7 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 22 Jan 2026 17:43:01 -0500 Subject: [PATCH 14/53] remove default macro which sets probablistic sampling to false --- .../src/transforms/trace_sampler/mod.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index a0cb0151f1..db01eeab30 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -81,7 +81,7 @@ fn default_probabilistic_sampling_enabled() -> bool { } /// Configuration for the trace sampler transform. -#[derive(Debug, Deserialize, Default)] +#[derive(Debug, Deserialize)] pub struct TraceSamplerConfiguration { /// Sampling percentage (0-100). /// @@ -114,6 +114,17 @@ pub struct TraceSamplerConfiguration { probabilistic_sampling_enabled: bool, } +impl Default for TraceSamplerConfiguration { + fn default() -> Self { + Self { + sampling_percentage: default_sampling_percentage(), + error_sampling_enabled: default_error_sampling_enabled(), + error_tracking_standalone: default_error_tracking_standalone(), + probabilistic_sampling_enabled: default_probabilistic_sampling_enabled(), + } + } +} + #[async_trait] impl TransformBuilder for TraceSamplerConfiguration { fn input_event_type(&self) -> EventType { From df98d8f8c8f56ddb2a6b9545f5aec2db3fc09a24 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 09:52:09 -0500 Subject: [PATCH 15/53] from tag from span which is already added to trace chunk --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index db01eeab30..693527e6c2 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -46,7 +46,6 @@ const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate // Sampling metadata keys / values (matching datadog-agent where applicable). const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; const TAG_DECISION_MAKER: &str = "_dd.p.dm"; -const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; // Single Span Sampling and Analytics Events keys const KEY_SPAN_SAMPLING_MECHANISM: &str = "_dd.span_sampling.mechanism"; @@ -423,11 +422,6 @@ impl TraceSampler { meta.insert(MetaString::from(TAG_DECISION_MAKER), MetaString::from(decision_maker)); } - // Add the sampling rate tag for observability - meta.insert( - MetaString::from(TAG_OTLP_SAMPLING_RATE), - MetaString::from(format!("{:.2}", self.sampling_rate)), - ); // Now we can use trace again to set sampling metadata let sampling = TraceSampling::new( From 740b8f7032e3d69f09e02809a530e3e1984a21fd Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 09:54:40 -0500 Subject: [PATCH 16/53] make fmt --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 693527e6c2..4f4359b723 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -422,7 +422,6 @@ impl TraceSampler { meta.insert(MetaString::from(TAG_DECISION_MAKER), MetaString::from(decision_maker)); } - // Now we can use trace again to set sampling metadata let sampling = TraceSampling::new( !keep, From ecdb386c98e4dfee6f9b401414a23f6d5d898fb9 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 10:00:55 -0500 Subject: [PATCH 17/53] remove tag added during translation process --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 4f4359b723..9582ffc4bc 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -407,12 +407,9 @@ impl TraceSampler { None => return, }; - // Add metrics - let metrics = root_span_value.metrics_mut(); - metrics.insert(MetaString::from(SAMPLING_PRIORITY_METRIC_KEY), priority as f64); - // Add the probabilistic sampling rate if requested if add_prob_rate { + let metrics = root_span_value.metrics_mut(); metrics.insert(MetaString::from(PROB_RATE_KEY), self.sampling_rate); } From 91254c7c342749bb458583f2475d420b085ce37a Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 10:08:43 -0500 Subject: [PATCH 18/53] disable tag when probalistic sampler isn't being used --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 9582ffc4bc..588844d648 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -371,7 +371,7 @@ impl TraceSampler { if user_priority > 0 { // User wants to keep this trace return ( - true, + false, user_priority, DECISION_MAKER_MANUAL_PRIORITY, false, @@ -386,7 +386,7 @@ impl TraceSampler { _sampler_name = SamplerName::Error; let keep = self.error_sampler.sample_error(now, trace, root_span_idx); if keep { - return (true, PRIORITY_AUTO_KEEP, "", false, Some(root_span_idx)); + return (false, PRIORITY_AUTO_KEEP, "", false, Some(root_span_idx)); } } From 43a889d56e5272da3145de448b49a565328cb1b5 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 10:15:07 -0500 Subject: [PATCH 19/53] revert change --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 588844d648..9582ffc4bc 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -371,7 +371,7 @@ impl TraceSampler { if user_priority > 0 { // User wants to keep this trace return ( - false, + true, user_priority, DECISION_MAKER_MANUAL_PRIORITY, false, @@ -386,7 +386,7 @@ impl TraceSampler { _sampler_name = SamplerName::Error; let keep = self.error_sampler.sample_error(now, trace, root_span_idx); if keep { - return (false, PRIORITY_AUTO_KEEP, "", false, Some(root_span_idx)); + return (true, PRIORITY_AUTO_KEEP, "", false, Some(root_span_idx)); } } From cd1d2164c8e9bb6d9545c786bf5fc3d57890db9b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 10:40:45 -0500 Subject: [PATCH 20/53] use apm config --- bin/agent-data-plane/src/cli/run.rs | 3 +- .../src/common/datadog/apm.rs | 109 ++++++++++++++++++ .../src/transforms/trace_sampler/mod.rs | 70 +++-------- 3 files changed, 127 insertions(+), 55 deletions(-) diff --git a/bin/agent-data-plane/src/cli/run.rs b/bin/agent-data-plane/src/cli/run.rs index 394c84413c..9c5be93019 100644 --- a/bin/agent-data-plane/src/cli/run.rs +++ b/bin/agent-data-plane/src/cli/run.rs @@ -328,7 +328,8 @@ async fn add_baseline_traces_pipeline_to_blueprint( .await?; let dd_traces_enrich_config = ChainedConfiguration::default().with_transform_builder("apm_onboarding", ApmOnboardingConfiguration); - let trace_sampler_config = TraceSamplerConfiguration::default(); + let trace_sampler_config = TraceSamplerConfiguration::from_configuration(config) + .error_context("Failed to configure Trace Sampler transform.")?; let apm_stats_transform_config = ApmStatsTransformConfiguration::from_configuration(config) .error_context("Failed to configure APM Stats transform.")? .with_environment_provider(env_provider.clone()) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index e4099b70ba..c9e0a76874 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -10,6 +10,21 @@ const fn default_target_traces_per_second() -> f64 { const fn default_errors_per_second() -> f64 { 10.0 } +const fn default_sampling_percentage() -> f64 { + 100.0 +} + +const fn default_error_sampling_enabled() -> bool { + true +} + +const fn default_error_tracking_standalone_enabled() -> bool { + false +} + +const fn default_probabilistic_sampling_enabled() -> bool { + true +} const fn default_peer_tags_aggregation() -> bool { true } @@ -31,6 +46,56 @@ struct ApmConfiguration { apm_config: ApmConfig, } +#[derive(Clone, Debug, Deserialize)] +struct ProbabilisticSamplerConfig { + /// Enables probabilistic sampling. + /// + /// When enabled, the trace sampler keeps approximately `sampling_percentage` of traces using a + /// deterministic hash of the trace ID. + /// + /// Defaults to `true`. + #[serde(default = "default_probabilistic_sampling_enabled")] + enabled: bool, + + /// Sampling percentage (0-100). + /// + /// Determines the percentage of traces to keep. A value of 100 keeps all traces, + /// while 50 keeps approximately half. Values outside 0-100 are treated as 100. + /// + /// Defaults to 100.0 (keep all traces). + #[serde(default = "default_sampling_percentage")] + sampling_percentage: f64, +} + +impl Default for ProbabilisticSamplerConfig { + fn default() -> Self { + Self { + enabled: default_probabilistic_sampling_enabled(), + sampling_percentage: default_sampling_percentage(), + } + } +} + +#[derive(Clone, Debug, Deserialize)] +struct ErrorTrackingStandaloneConfig { + /// Enables Error Tracking Standalone mode. + /// + /// When enabled, error tracking standalone mode suppresses single-span sampling and analytics + /// events for dropped traces. + /// + /// Defaults to `false`. + #[serde(default = "default_error_tracking_standalone_enabled")] + enabled: bool, +} + +impl Default for ErrorTrackingStandaloneConfig { + fn default() -> Self { + Self { + enabled: default_error_tracking_standalone_enabled(), + } + } +} + #[derive(Clone, Debug, Deserialize)] pub struct ApmConfig { /// Target traces per second for priority sampling. @@ -45,6 +110,27 @@ pub struct ApmConfig { #[serde(default = "default_errors_per_second")] errors_per_second: f64, + /// Probabilistic sampler configuration. + /// + /// Defaults to enabled with `sampling_percentage` set to 100.0 (keep all traces). + #[serde(default)] + probabilistic_sampler: ProbabilisticSamplerConfig, + + /// Enable error sampling in the trace sampler. + /// + /// When enabled, traces containing errors will be kept even if they would be dropped by + /// probabilistic sampling. This ensures error visibility at low sampling rates. + /// + /// Defaults to `true`. + #[serde(default = "default_error_sampling_enabled")] + error_sampling_enabled: bool, + + /// Error Tracking Standalone configuration. + /// + /// Defaults to disabled. + #[serde(default)] + error_tracking_standalone: ErrorTrackingStandaloneConfig, + /// Enables an additional stats computation check on spans to see if they have an eligible `span.kind` (server, consumer, client, producer). /// If enabled, a span with an eligible `span.kind` will have stats computed. If disabled, only top-level and measured spans will have stats computed. /// @@ -94,6 +180,26 @@ impl ApmConfig { self.errors_per_second } + /// Returns if probabilistic sampling is enabled. + pub const fn probabilistic_sampler_enabled(&self) -> bool { + self.probabilistic_sampler.enabled + } + + /// Returns the probabilistic sampler sampling percentage. + pub const fn probabilistic_sampler_sampling_percentage(&self) -> f64 { + self.probabilistic_sampler.sampling_percentage + } + + /// Returns if error sampling is enabled. + pub const fn error_sampling_enabled(&self) -> bool { + self.error_sampling_enabled + } + + /// Returns if error tracking standalone mode is enabled. + pub const fn error_tracking_standalone_enabled(&self) -> bool { + self.error_tracking_standalone.enabled + } + /// Returns if stats computation by span kind is enabled. pub const fn compute_stats_by_span_kind(&self) -> bool { self.compute_stats_by_span_kind @@ -132,6 +238,9 @@ impl Default for ApmConfig { Self { target_traces_per_second: default_target_traces_per_second(), errors_per_second: default_errors_per_second(), + probabilistic_sampler: ProbabilisticSamplerConfig::default(), + error_sampling_enabled: default_error_sampling_enabled(), + error_tracking_standalone: ErrorTrackingStandaloneConfig::default(), compute_stats_by_span_kind: default_compute_stats_by_span_kind(), peer_tags_aggregation: default_peer_tags_aggregation(), peer_tags: Vec::new(), diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 9582ffc4bc..04d32886d2 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -9,6 +9,7 @@ use async_trait::async_trait; use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; use saluki_common::collections::FastHashMap; +use saluki_config::GenericConfiguration; use saluki_core::{ components::{transforms::*, ComponentContext}, data_model::event::{ @@ -18,7 +19,6 @@ use saluki_core::{ topology::OutputDefinition, }; use saluki_error::GenericError; -use serde::Deserialize; use stringtheory::MetaString; use tokio::select; use tracing::debug; @@ -29,6 +29,7 @@ mod probabilistic; mod score_sampler; mod signature; +use crate::common::datadog::apm::ApmConfig; use self::probabilistic::PROB_RATE_KEY; // Sampling priority constants (matching datadog-agent) @@ -63,63 +64,24 @@ enum SamplerName { // TODO: add NoPriority,Priority,Rare } -fn default_sampling_percentage() -> f64 { - 100.0 -} - -fn default_error_sampling_enabled() -> bool { - true -} - -fn default_error_tracking_standalone() -> bool { - false -} - -fn default_probabilistic_sampling_enabled() -> bool { - true -} - /// Configuration for the trace sampler transform. -#[derive(Debug, Deserialize)] +#[derive(Debug)] pub struct TraceSamplerConfiguration { - /// Sampling percentage (0-100). - /// - /// Determines the percentage of traces to keep. A value of 100 keeps all traces, - /// while 50 keeps approximately half. Values outside 0-100 are treated as 100. - /// - /// Defaults to 100.0 (keep all traces). - #[serde(default = "default_sampling_percentage")] - sampling_percentage: f64, - - /// Enable error sampling. - /// - /// When enabled, traces containing errors will be kept even if they would be - /// dropped by probabilistic sampling. This ensures error visibility at low sampling rates. - /// - /// Defaults to `true`. - #[serde(default = "default_error_sampling_enabled")] - error_sampling_enabled: bool, - - /// TODO: implement full functionality, this is just used in an if statement currently. https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L1073-L1080 - #[serde(default = "default_error_tracking_standalone")] - error_tracking_standalone: bool, + apm_config: ApmConfig, +} - /// Enable probabilistic sampling. - /// - /// When enabled, traces will be sampled probabilistically based on the sampling percentage. - /// - /// Defaults to `true`. - #[serde(default = "default_probabilistic_sampling_enabled")] - probabilistic_sampling_enabled: bool, +impl TraceSamplerConfiguration { + /// Creates a new `TraceSamplerConfiguration` from the given configuration. + pub fn from_configuration(config: &GenericConfiguration) -> Result { + let apm_config = ApmConfig::from_configuration(config)?; + Ok(Self { apm_config }) + } } impl Default for TraceSamplerConfiguration { fn default() -> Self { Self { - sampling_percentage: default_sampling_percentage(), - error_sampling_enabled: default_error_sampling_enabled(), - error_tracking_standalone: default_error_tracking_standalone(), - probabilistic_sampling_enabled: default_probabilistic_sampling_enabled(), + apm_config: ApmConfig::default(), } } } @@ -137,10 +99,10 @@ impl TransformBuilder for TraceSamplerConfiguration { async fn build(&self, _context: ComponentContext) -> Result, GenericError> { let sampler = TraceSampler { - sampling_rate: self.sampling_percentage / 100.0, - error_sampling_enabled: self.error_sampling_enabled, - error_tracking_standalone: self.error_tracking_standalone, - probabilistic_sampler_enabled: self.probabilistic_sampling_enabled, + sampling_rate: self.apm_config.probabilistic_sampler_sampling_percentage() / 100.0, + error_sampling_enabled: self.apm_config.error_sampling_enabled(), + error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), + probabilistic_sampler_enabled: self.apm_config.probabilistic_sampler_enabled(), error_sampler: errors::ErrorsSampler::new(ERROR_TPS, ERROR_SAMPLE_RATE), }; From 67c29855ba7f3e00c16df56b02f5ac804d10aa77 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 10:43:20 -0500 Subject: [PATCH 21/53] make check-clippy --- .../src/transforms/trace_sampler/mod.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 04d32886d2..9fcb662198 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -29,8 +29,8 @@ mod probabilistic; mod score_sampler; mod signature; -use crate::common::datadog::apm::ApmConfig; use self::probabilistic::PROB_RATE_KEY; +use crate::common::datadog::apm::ApmConfig; // Sampling priority constants (matching datadog-agent) #[allow(dead_code)] @@ -78,14 +78,6 @@ impl TraceSamplerConfiguration { } } -impl Default for TraceSamplerConfiguration { - fn default() -> Self { - Self { - apm_config: ApmConfig::default(), - } - } -} - #[async_trait] impl TransformBuilder for TraceSamplerConfiguration { fn input_event_type(&self) -> EventType { From d17c04f8d973d7fbf37d18c7967a2f96910632aa Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 14:00:06 -0500 Subject: [PATCH 22/53] address comments --- .../transforms/trace_sampler/core_sampler.rs | 28 +++------------- .../src/transforms/trace_sampler/errors.rs | 20 +----------- .../src/transforms/trace_sampler/mod.rs | 32 +++++-------------- .../transforms/trace_sampler/probabilistic.rs | 14 +++----- .../transforms/trace_sampler/score_sampler.rs | 28 ++++------------ .../src/transforms/trace_sampler/signature.rs | 9 ++---- 6 files changed, 27 insertions(+), 104 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs index f23fd6b2f5..3b268bc83f 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs @@ -1,5 +1,3 @@ -#![allow(dead_code)] - use std::time::{Duration, SystemTime, UNIX_EPOCH}; use saluki_common::collections::FastHashMap; @@ -19,15 +17,19 @@ pub struct Sampler { /// allSigSeen counts all signatures in a circular buffer of NUM_BUCKETS of BUCKET_DURATION all_sigs_seen: [f32; NUM_BUCKETS], + /// lastBucketID is the index of the last bucket on which traces were counted last_bucket_id: u64, + /// rates maps sampling rate in % rates: FastHashMap, + /// lowestRate is the lowest rate of all signatures lowest_rate: f64, - // TODO: add comments for the source code, etc. + /// Maximum limit to the total number of traces per second to sample target_tps: f64, + /// extraRate is an extra raw sampling rate to apply on top of the sampler rate extra_rate: f64, } @@ -230,26 +232,6 @@ impl Sampler { rate } - /// Updates the target TPS and adjusts all existing rates proportionally. - pub fn update_target_tps(&mut self, new_target_tps: f64) { - let previous_target_tps = self.target_tps; - self.target_tps = new_target_tps; - - if previous_target_tps == 0.0 { - return; - } - - let ratio = new_target_tps / previous_target_tps; - for rate in self.rates.values_mut() { - *rate = (*rate * ratio).min(1.0); - } - } - - /// Get the current target TPS. - pub fn get_target_tps(&self) -> f64 { - self.target_tps - } - /// Returns the number of signatures being tracked. pub fn size(&self) -> i64 { self.seen.len() as i64 diff --git a/lib/saluki-components/src/transforms/trace_sampler/errors.rs b/lib/saluki-components/src/transforms/trace_sampler/errors.rs index b25af304a7..5518c289d3 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/errors.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/errors.rs @@ -3,11 +3,9 @@ //! The error sampler catches traces containing spans with errors, ensuring //! error visibility even at low sampling rates. -#![allow(dead_code)] - use std::time::SystemTime; -use saluki_core::data_model::event::trace::{Span, Trace}; +use saluki_core::data_model::event::trace::Trace; use super::score_sampler::{ScoreSampler, ERRORS_RATE_KEY}; @@ -29,26 +27,10 @@ impl ErrorsSampler { } } - /// /// This method should be called when a trace contains errors and needs to be /// evaluated by the error sampler. pub(super) fn sample_error(&mut self, now: SystemTime, trace: &mut Trace, root_idx: usize) -> bool { // Use the score sampler to make the sampling decision self.score_sampler.sample(now, trace, root_idx) } - - /// Set the error sampling rate metric on a span. - pub(super) fn set_sampling_rate_metric(&self, span: &mut Span, rate: f64) { - self.score_sampler.set_sampling_rate_metric(span, rate); - } - - /// Get the target TPS for error sampling. - pub(super) fn get_target_tps(&self) -> f64 { - self.score_sampler.get_target_tps() - } - - /// Update the target TPS for error sampling. - pub(super) fn update_target_tps(&mut self, target_tps: f64) { - self.score_sampler.update_target_tps(target_tps); - } } diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 9fcb662198..674f4b97a1 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -5,6 +5,12 @@ //! - User-set priority preservation //! - Error-based sampling as a safety net //! - OTLP trace ingestion with proper sampling decision handling +//! +//! # Missing +//! +//! add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go +//! adding missing samplers (priority, nopriority, rare) +//! add error tracking standalone mode use async_trait::async_trait; use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; @@ -57,13 +63,6 @@ const DECISION_MAKER_PROBABILISTIC: &str = "-9"; #[allow(dead_code)] const DECISION_MAKER_MANUAL_PRIORITY: &str = "-4"; -enum SamplerName { - Probabilistic, - Error, - Unknown, - // TODO: add NoPriority,Priority,Rare -} - /// Configuration for the trace sampler transform. #[derive(Debug)] pub struct TraceSamplerConfiguration { @@ -213,7 +212,7 @@ impl TraceSampler { false } - /// Check if trace contains spans with Single Span Sampling tags + /// Returns all spans from the given trace that have Single Span Sampling tags present. fn get_single_span_sampled_spans(&self, trace: &Trace) -> Vec { let mut sampled_spans = Vec::new(); for span in trace.spans().iter() { @@ -224,7 +223,7 @@ impl TraceSampler { sampled_spans } - /// Get spans marked as analyzed (analytics events) + /// Returns all spans from the given trace that have Single Span Sampling tags present. fn get_analyzed_spans(&self, trace: &Trace) -> Vec { let mut analyzed_spans = Vec::new(); for span in trace.spans().iter() { @@ -279,21 +278,12 @@ impl TraceSampler { return (false, PRIORITY_AUTO_DROP, "", false, None); }; - let mut _sampler_name = SamplerName::Unknown; // TODO: add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go - - // TODO: Error Tracking Standalone mode (ETS) - - // TODO: Run RareSampler early to count signatures - // Modern path: ProbabilisticSamplerEnabled = true if self.probabilistic_sampler_enabled { - _sampler_name = SamplerName::Probabilistic; let mut prob_keep = false; let mut decision_maker = ""; let mut should_add_prob_rate = false; - // TODO: Check if rare sampler kept it - // Run probabilistic sampler - use root span's trace ID let root_trace_id = trace.spans()[root_span_idx].trace_id(); @@ -302,7 +292,6 @@ impl TraceSampler { should_add_prob_rate = true; prob_keep = true; } else if self.error_sampling_enabled && contains_error { - _sampler_name = SamplerName::Error; prob_keep = self.error_sampler.sample_error(now, trace, root_span_idx); } @@ -334,10 +323,7 @@ impl TraceSampler { } } - // TODO: add missing samplers (priority, no_priority) - if self.error_sampling_enabled && self.trace_contains_error(trace, false) { - _sampler_name = SamplerName::Error; let keep = self.error_sampler.sample_error(now, trace, root_span_idx); if keep { return (true, PRIORITY_AUTO_KEEP, "", false, Some(root_span_idx)); @@ -390,8 +376,6 @@ impl TraceSampler { #[async_trait] impl Transform for TraceSampler { - // run takes `self: Box`, and not &self, so it consumes the `TraceSampler` instance, after run starts there is a single owner of the sampler for the lifetime of the task. This means - // that no internal locking is necessary unlike the agent code referenced. async fn run(mut self: Box, mut context: TransformContext) -> Result<(), GenericError> { let mut health = context.take_health_handle(); health.mark_ready(); diff --git a/lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs b/lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs index 8bbae09297..785823e5c3 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/probabilistic.rs @@ -2,13 +2,6 @@ use super::signature::fnv1a_32; -// Knuth multiplicative hashing factor for deterministic sampling. -// -// This constant is shared across datadog-agent, Datadog libraries, and OpenTelemetry. -// It is currently unused, but kept to mirror the upstream implementation for future work. -#[allow(dead_code)] -const KNUTH_FACTOR: u64 = 1111111111111111111; - // Probabilistic sampler constants (matching datadog-agent's bucketed sampler). // These constants exist to match the behavior of the OTEL probabilistic sampler. // See: https://github.com/open-telemetry/opentelemetry-collector-contrib/.../probabilisticsamplerprocessor/tracesprocessor.go#L38-L42 @@ -19,6 +12,11 @@ const BITMASK_HASH_BUCKETS: u32 = NUM_PROBABILISTIC_BUCKETS - 1; pub(super) const PROB_RATE_KEY: &str = "_dd.prob_sr"; /// Probabilistic sampler. +/// +/// #Missing +/// +/// - Full trace ID mode (off by default). +/// - Make hash seed configurable pub(super) struct ProbabilisticSampler; impl ProbabilisticSampler { @@ -28,7 +26,6 @@ impl ProbabilisticSampler { pub(super) fn sample(trace_id: u64, sampling_rate: f64) -> bool { // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/probabilistic.go#L62 // we take in a trace id (randomly generated) and hash/mask it to get a number between 0 and 0x3FFF and compare it to the sampling rate. - // TODO: add full trace id mode (off by default) // to match the agent behaviour, we need to make the array 16 bytes long, this is used for full trace id mode // but we require it now to match the hash. @@ -37,7 +34,6 @@ impl ProbabilisticSampler { // Match the datadog-agent bucketed probabilistic sampler behavior. // (Fixed zero hash seed; equivalent to the agent's default when unset.) - // TODO: make the hash seed configurable let hash_seed = [0u8; 4]; let hash = fnv1a_32(&hash_seed, &tid); let scaled_sampling_percentage = (sampling_rate * NUM_PROBABILISTIC_BUCKETS as f64) as u32; diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs index 70519fd8ac..9eb918aba2 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -13,8 +13,6 @@ const KEY_SAMPLING_RATE_PRE_SAMPLER: &str = "_dd1.sr.rapre"; // ScoreSampler-specific rate keys pub(super) const ERRORS_RATE_KEY: &str = "_dd.errors_sr"; -#[allow(dead_code)] -const NO_PRIORITY_RATE_KEY: &str = "_dd.no_p_sr"; // shrinkCardinality is the max Signature cardinality before shrinking const SHRINK_CARDINALITY: usize = 200; @@ -31,6 +29,11 @@ const SAMPLER_HASHER: u64 = 1111111111111111111; /// scoring it and applying a rate. /// The rates are applied on the TraceID to maximize the number of chunks with errors caught for the same traceID. /// For a set traceID: P(chunk1 kept and chunk2 kept) = min(P(chunk1 kept), P(chunk2 kept)) +/// +/// # Missing +/// +/// TODO: Add SampleV1 method for legacy trace format support +/// TODO: Add NoPrioritySampler implementation pub struct ScoreSampler { sampler: Sampler, sampling_rate_key: &'static str, @@ -50,9 +53,7 @@ impl ScoreSampler { } } - /// Sample a trace chunk when you only have a root span index. - /// - /// This avoids borrowing the full trace while holding a mutable root span reference. + /// Sample counts an incoming trace and tells if it is a sample which has to be kept pub fn sample(&mut self, now: SystemTime, trace: &mut Trace, root_idx: usize) -> bool { // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/scoresampler.go#L71 if self.disabled { @@ -127,16 +128,6 @@ impl ScoreSampler { Signature(sig.0 % (SHRINK_CARDINALITY as u64 / 2)) } - /// Get the target TPS for this sampler. - pub fn get_target_tps(&self) -> f64 { - self.sampler.get_target_tps() - } - - /// Update the target TPS for this sampler. - pub fn update_target_tps(&mut self, target_tps: f64) { - self.sampler.update_target_tps(target_tps); - } - /// Set the sampling rate metric on a span. pub fn set_sampling_rate_metric(&self, span: &mut Span, rate: f64) { span.metrics_mut() @@ -178,10 +169,3 @@ fn sample_by_rate(trace_id: u64, rate: f64) -> bool { true } } - -// TODO: Add SampleV1 method for legacy trace format support -// This would handle the V1 trace format used for backwards compatibility - -// TODO: Add NoPrioritySampler implementation -// NoPrioritySampler is dedicated to catching traces with no priority set. -// It would wrap ScoreSampler similar to ErrorsSampler diff --git a/lib/saluki-components/src/transforms/trace_sampler/signature.rs b/lib/saluki-components/src/transforms/trace_sampler/signature.rs index 47e9327c68..7e57780503 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/signature.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/signature.rs @@ -4,13 +4,13 @@ //! - a small FNV-1a 32-bit helper (used by probabilistic sampling) //! - a signature newtype + compute helper (for score/TPS samplers) -#![allow(dead_code)] - use saluki_core::data_model::event::trace::{Span, Trace}; use stringtheory::MetaString; const OFFSET_32: u32 = 2166136261; const PRIME_32: u32 = 16777619; +const KEY_HTTP_STATUS_CODE: &str = "http.status_code"; +const KEY_ERROR_TYPE: &str = "error.type"; fn write_hash(mut hash: u32, bytes: &[u8]) -> u32 { for &b in bytes { @@ -21,18 +21,13 @@ fn write_hash(mut hash: u32, bytes: &[u8]) -> u32 { } pub(super) fn fnv1a_32(seed: &[u8], bytes: &[u8]) -> u32 { - const OFFSET_32: u32 = 0x811c_9dc5; let hash = write_hash(OFFSET_32, seed); write_hash(hash, bytes) } -#[allow(dead_code)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub(super) struct Signature(pub(super) u64); -const KEY_HTTP_STATUS_CODE: &str = "http.status_code"; -const KEY_ERROR_TYPE: &str = "error.type"; - fn get_trace_env(trace: &Trace, root_span_idx: usize) -> Option<&MetaString> { // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/trace.go#L19-L20 let env = trace.spans().get(root_span_idx).and_then(|span| span.meta().get("env")); From 633b43a59fe8ae3a4d81429a36c35d2bcac8df44 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 14:28:42 -0500 Subject: [PATCH 23/53] fix missing doc, fix bugs --- .../transforms/trace_sampler/core_sampler.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs index 3b268bc83f..1c8c1db5b4 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs @@ -14,22 +14,22 @@ pub struct Sampler { /// maps each Signature to a circular buffer of per-bucket (bucket_id) counts covering the last NUM_BUCKETS * BUCKET_DURATION window. seen: FastHashMap, - /// allSigSeen counts all signatures in a circular buffer of NUM_BUCKETS of BUCKET_DURATION + /// all_sigs_seen counts all signatures in a circular buffer of NUM_BUCKETS of BUCKET_DURATION all_sigs_seen: [f32; NUM_BUCKETS], - /// lastBucketID is the index of the last bucket on which traces were counted + /// last_bucket_id is the index of the last bucket on which traces were counted last_bucket_id: u64, /// rates maps sampling rate in % rates: FastHashMap, - /// lowestRate is the lowest rate of all signatures + /// lowest_rate is the lowest rate of all signatures lowest_rate: f64, /// Maximum limit to the total number of traces per second to sample target_tps: f64, - /// extraRate is an extra raw sampling rate to apply on top of the sampler rate + /// extra_rate is an extra raw sampling rate to apply on top of the sampler rate extra_rate: f64, } @@ -38,8 +38,8 @@ pub struct Sampler { fn zero_and_get_max(buckets: &mut [f32; NUM_BUCKETS], previous_bucket: u64, new_bucket: u64) -> f32 { // A bucket is a BUCKET_DURATION slice (5s) that stores the count of traces that fell in the interval. // An intuitive understanding of the function is that we start just after previous_buckets and iterate for a full window of buckets (NUM_BUCKETS) - // and zero out any buckets older then new_buckets (expired), then we compute the max_count amoung the buckets that are in the current window - let mut max_bucket = 0 as f32; + // and zero out any buckets older then new_buckets (expired), then we compute the max_count among the buckets that are in the current window + let mut max_bucket = 0.0; for i in (previous_bucket + 1)..=previous_bucket + NUM_BUCKETS as u64 { let index = i as usize % NUM_BUCKETS; // if a complete rotation (time between previous_bucket and new_bucket is more then NUM_BUCKETS * BUCKET_DURATION) happened between previous_bucket and new_bucket @@ -76,6 +76,10 @@ fn compute_tps_per_sig(target_tps: f64, seen_tps: &[f64]) -> f64 { // 3) Next is last element, break. // Return sig_target = 15. // Interpretation: the low‑volume signatures "use up" 5 and 10 TPS, and the remaining budget (15) is the per‑signature target for the higher‑volume signature(s). + + if seen_tps.len() == 0 { + return 0.0; + } let mut sorted: Vec = seen_tps.to_vec(); sorted.sort_by(|a, b| a.total_cmp(b)); // compute the initial per_signature TPS budget by splitting target_tps across all signatures. @@ -105,6 +109,7 @@ impl Sampler { // All traces within the same `BUCKET_DURATION` interval share the same bucket_id let bucket_id = now.duration_since(UNIX_EPOCH).unwrap_or_default().as_secs() / BUCKET_DURATION.as_secs(); let prev_bucket_id = self.last_bucket_id; + self.last_bucket_id = bucket_id; // If the bucket_id changed then the sliding window advanced and we need to recompute rates let update_rate = prev_bucket_id != bucket_id; if update_rate { From 9c20678aafb943ce9497b6b02cfaed1dd944d27e Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 14:38:00 -0500 Subject: [PATCH 24/53] address comments part two --- .../transforms/trace_sampler/core_sampler.rs | 2 +- .../src/transforms/trace_sampler/mod.rs | 90 ++++++++----------- 2 files changed, 39 insertions(+), 53 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs index 1c8c1db5b4..e17742c0aa 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs @@ -77,7 +77,7 @@ fn compute_tps_per_sig(target_tps: f64, seen_tps: &[f64]) -> f64 { // Return sig_target = 15. // Interpretation: the low‑volume signatures "use up" 5 and 10 TPS, and the remaining budget (15) is the per‑signature target for the higher‑volume signature(s). - if seen_tps.len() == 0 { + if seen_tps.is_empty() { return 0.0; } let mut sorted: Vec = seen_tps.to_vec(); diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 674f4b97a1..cce6f741e9 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -264,33 +264,37 @@ impl TraceSampler { } } - /// Main sampling pipeline - mirrors datadog-agent's runSamplers flow - /// Returns (keep_decision, priority, decision_maker_tag, should_add_prob_rate, root_span_index) - fn run_samplers(&mut self, trace: &mut Trace) -> (bool, i32, &'static str, bool, Option) { + /// Evaluates the given trace against all configured samplers. + /// + /// Return a tuple containing whether or not the trace should be kept, the decision maker tag (which sampler is responsible), + /// and the index of the root span used for evaluation. + fn run_samplers(&mut self, trace: &mut Trace) -> (bool, i32, &'static str, Option) { // logic taken from: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L1066 let now = std::time::SystemTime::now(); // Empty trace check if trace.spans().is_empty() { - return (false, PRIORITY_AUTO_DROP, "", false, None); + return (false, PRIORITY_AUTO_DROP, "", None); } let contains_error = self.trace_contains_error(trace, false); let Some(root_span_idx) = self.get_root_span_index(trace) else { - return (false, PRIORITY_AUTO_DROP, "", false, None); + return (false, PRIORITY_AUTO_DROP, "", None); }; // Modern path: ProbabilisticSamplerEnabled = true if self.probabilistic_sampler_enabled { let mut prob_keep = false; let mut decision_maker = ""; - let mut should_add_prob_rate = false; // Run probabilistic sampler - use root span's trace ID - let root_trace_id = trace.spans()[root_span_idx].trace_id(); if self.sample_probabilistic(root_trace_id) { decision_maker = DECISION_MAKER_PROBABILISTIC; // probabilistic sampling - should_add_prob_rate = true; prob_keep = true; + + if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { + let metrics = root_span.metrics_mut(); + metrics.insert(MetaString::from(PROB_RATE_KEY), self.sampling_rate); + } } else if self.error_sampling_enabled && contains_error { prob_keep = self.error_sampler.sample_error(now, trace, root_span_idx); } @@ -301,37 +305,25 @@ impl TraceSampler { PRIORITY_AUTO_DROP }; - return ( - prob_keep, - priority, - decision_maker, - should_add_prob_rate, - Some(root_span_idx), - ); + return (prob_keep, priority, decision_maker, Some(root_span_idx)); } if let Some(user_priority) = self.get_user_priority(trace, root_span_idx) { if user_priority > 0 { // User wants to keep this trace - return ( - true, - user_priority, - DECISION_MAKER_MANUAL_PRIORITY, - false, - Some(root_span_idx), - ); + return (true, user_priority, DECISION_MAKER_MANUAL_PRIORITY, Some(root_span_idx)); } } if self.error_sampling_enabled && self.trace_contains_error(trace, false) { let keep = self.error_sampler.sample_error(now, trace, root_span_idx); if keep { - return (true, PRIORITY_AUTO_KEEP, "", false, Some(root_span_idx)); + return (true, PRIORITY_AUTO_KEEP, "", Some(root_span_idx)); } } // Default: drop the trace - (false, PRIORITY_AUTO_DROP, "", false, Some(root_span_idx)) + (false, PRIORITY_AUTO_DROP, "", Some(root_span_idx)) } /// Apply sampling metadata to the trace in-place. @@ -339,20 +331,13 @@ impl TraceSampler { /// The `root_span_id` parameter identifies which span should receive the sampling metadata. /// This avoids recalculating the root span since it was already found in `run_samplers`. fn apply_sampling_metadata( - &self, trace: &mut Trace, keep: bool, priority: i32, decision_maker: &str, add_prob_rate: bool, - root_span_idx: usize, + &self, trace: &mut Trace, keep: bool, priority: i32, decision_maker: &str, root_span_idx: usize, ) { let root_span_value = match trace.spans_mut().get_mut(root_span_idx) { Some(span) => span, None => return, }; - // Add the probabilistic sampling rate if requested - if add_prob_rate { - let metrics = root_span_value.metrics_mut(); - metrics.insert(MetaString::from(PROB_RATE_KEY), self.sampling_rate); - } - // Add tag for the decision maker let meta = root_span_value.meta_mut(); if priority > 0 && !decision_maker.is_empty() { @@ -393,10 +378,8 @@ impl Transform for TraceSampler { // keep is a boolean that indicates if the trace should be kept or dropped // priority is the sampling priority // decision_maker is the tag that indicates the decision maker (probabilistic, error, etc.) - // add_prob_rate is a boolean that indicates if the PROB_RATE_KEY should be added to the the root span // root_span_idx is the index of the root span of the trace - let (keep, priority, decision_maker, add_prob_rate, root_span_idx) = - self.run_samplers(&mut trace); + let (keep, priority, decision_maker, root_span_idx) = self.run_samplers(&mut trace); if keep { if let Some(root_idx) = root_span_idx { self.apply_sampling_metadata( @@ -404,7 +387,6 @@ impl Transform for TraceSampler { keep, priority, decision_maker, - add_prob_rate, root_idx, ); } @@ -629,7 +611,7 @@ mod tests { let mut trace = create_test_trace(vec![span]); trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_USER_KEEP), None, None))); - let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); assert!(keep); assert_eq!(priority, PRIORITY_USER_KEEP); assert_eq!(decision_maker, DECISION_MAKER_MANUAL_PRIORITY); @@ -639,7 +621,7 @@ mod tests { let mut trace = create_test_trace(vec![span]); trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_USER_DROP), None, None))); - let (keep, priority, _, _, _) = sampler.run_samplers(&mut trace); + let (keep, priority, _, _) = sampler.run_samplers(&mut trace); assert!(!keep); // Should not keep when user drops assert_eq!(priority, PRIORITY_AUTO_DROP); // Fallthrough to auto-drop @@ -648,7 +630,7 @@ mod tests { let mut trace = create_test_trace(vec![span]); trace.set_sampling(Some(TraceSampling::new(false, Some(PRIORITY_AUTO_KEEP), None, None))); - let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); assert!(keep); assert_eq!(priority, PRIORITY_AUTO_KEEP); assert_eq!(decision_maker, DECISION_MAKER_MANUAL_PRIORITY); @@ -692,7 +674,7 @@ mod tests { let span_with_error = create_test_span(u64::MAX - 1, 1, 1); let mut trace = create_test_trace(vec![span_with_error]); - let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); assert!(keep); assert_eq!(priority, PRIORITY_AUTO_KEEP); assert_eq!(decision_maker, ""); // Error sampler doesn't set decision_maker @@ -706,7 +688,7 @@ mod tests { let span = create_test_span_with_metrics(12345, 1, metrics); let mut trace = create_test_trace(vec![span]); - let (keep, priority, decision_maker, _, _) = sampler.run_samplers(&mut trace); + let (keep, priority, decision_maker, _) = sampler.run_samplers(&mut trace); assert!(keep); assert_eq!(priority, 2); // UserKeep assert_eq!(decision_maker, DECISION_MAKER_MANUAL_PRIORITY); // manual decision @@ -717,7 +699,7 @@ mod tests { let mut sampler = create_test_sampler(); let mut trace = create_test_trace(vec![]); - let (keep, priority, _, _, _) = sampler.run_samplers(&mut trace); + let (keep, priority, _, _) = sampler.run_samplers(&mut trace); assert!(!keep); assert_eq!(priority, PRIORITY_AUTO_DROP); } @@ -865,32 +847,36 @@ mod tests { ); let mut trace = create_test_trace(vec![root_span]); - let (keep, priority, decision_maker, add_prob_rate, root_span_idx) = sampler.run_samplers(&mut trace); + let (keep, priority, decision_maker, root_span_idx) = sampler.run_samplers(&mut trace); if keep && decision_maker == DECISION_MAKER_PROBABILISTIC { - // If sampled probabilistically, check probRateKey should be added + // If sampled probabilistically, check that probRateKey was already added assert_eq!(priority, PRIORITY_AUTO_KEEP); assert_eq!(decision_maker, DECISION_MAKER_PROBABILISTIC); // probabilistic sampling marker - assert!(add_prob_rate); // Should add prob_rate_key - // Use root span index directly + // Check that the root span already has the probRateKey (it should have been added in run_samplers) let root_idx = root_span_idx.unwrap_or(0); + let root_span = &trace.spans()[root_idx]; + assert!(root_span.metrics().contains_key(PROB_RATE_KEY)); + assert_eq!(*root_span.metrics().get(PROB_RATE_KEY).unwrap(), 0.75); - // Test that metadata is applied correctly + // Test that apply_sampling_metadata still works correctly for other metadata let mut trace_with_metadata = trace.clone(); sampler.apply_sampling_metadata( &mut trace_with_metadata, keep, priority, decision_maker, - add_prob_rate, root_idx, ); - // Check that the root span has the probRateKey - let modified_root = &trace_with_metadata.spans()[0]; - assert!(modified_root.metrics().contains_key(PROB_RATE_KEY)); - assert_eq!(*modified_root.metrics().get(PROB_RATE_KEY).unwrap(), 0.75); + // Check that decision maker tag was added + let modified_root = &trace_with_metadata.spans()[root_idx]; + assert!(modified_root.meta().contains_key(TAG_DECISION_MAKER)); + assert_eq!( + modified_root.meta().get(TAG_DECISION_MAKER).unwrap(), + &MetaString::from(DECISION_MAKER_PROBABILISTIC) + ); } } } From 34f011463c522b844519babd8a49a2eb66d6c74e Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 14:45:33 -0500 Subject: [PATCH 25/53] remove dead code, move code only used in test --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index cce6f741e9..a8b25a4945 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -39,11 +39,8 @@ use self::probabilistic::PROB_RATE_KEY; use crate::common::datadog::apm::ApmConfig; // Sampling priority constants (matching datadog-agent) -#[allow(dead_code)] -const PRIORITY_USER_DROP: i32 = -1; const PRIORITY_AUTO_DROP: i32 = 0; const PRIORITY_AUTO_KEEP: i32 = 1; -#[allow(dead_code)] const PRIORITY_USER_KEEP: i32 = 2; // TODO: Make error_tps and extra_sample_rate configurable @@ -488,6 +485,7 @@ mod tests { use saluki_core::data_model::event::trace::{Span as DdSpan, Trace}; use super::*; + const PRIORITY_USER_DROP: i32 = -1; fn create_test_sampler() -> TraceSampler { TraceSampler { From 934411ac9f1c0da2c8ed8d97f35d2b5ac6cbbda9 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 14:59:07 -0500 Subject: [PATCH 26/53] remove more dead code --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index a8b25a4945..d6236be41e 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -57,7 +57,6 @@ const KEY_ANALYZED_SPANS: &str = "_dd.analyzed"; // Decision maker values for `_dd.p.dm` (matching datadog-agent). const DECISION_MAKER_PROBABILISTIC: &str = "-9"; -#[allow(dead_code)] const DECISION_MAKER_MANUAL_PRIORITY: &str = "-4"; /// Configuration for the trace sampler transform. From fdc04fd60d83bd2b017c40cfd4070ed6ca800f45 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 15:01:40 -0500 Subject: [PATCH 27/53] formatting --- .../src/transforms/trace_sampler/core_sampler.rs | 10 +++++----- .../src/transforms/trace_sampler/mod.rs | 14 ++++---------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs index e17742c0aa..26adbeb08c 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/core_sampler.rs @@ -17,19 +17,19 @@ pub struct Sampler { /// all_sigs_seen counts all signatures in a circular buffer of NUM_BUCKETS of BUCKET_DURATION all_sigs_seen: [f32; NUM_BUCKETS], - /// last_bucket_id is the index of the last bucket on which traces were counted + /// last_bucket_id is the index of the last bucket on which traces were counted last_bucket_id: u64, - /// rates maps sampling rate in % + /// rates maps sampling rate in % rates: FastHashMap, - /// lowest_rate is the lowest rate of all signatures + /// lowest_rate is the lowest rate of all signatures lowest_rate: f64, - /// Maximum limit to the total number of traces per second to sample + /// Maximum limit to the total number of traces per second to sample target_tps: f64, - /// extra_rate is an extra raw sampling rate to apply on top of the sampler rate + /// extra_rate is an extra raw sampling rate to apply on top of the sampler rate extra_rate: f64, } diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index d6236be41e..2e7ce44bcc 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -5,11 +5,11 @@ //! - User-set priority preservation //! - Error-based sampling as a safety net //! - OTLP trace ingestion with proper sampling decision handling -//! +//! //! # Missing -//! +//! //! add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go -//! adding missing samplers (priority, nopriority, rare) +//! adding missing samplers (priority, nopriority, rare) //! add error tracking standalone mode use async_trait::async_trait; @@ -859,13 +859,7 @@ mod tests { // Test that apply_sampling_metadata still works correctly for other metadata let mut trace_with_metadata = trace.clone(); - sampler.apply_sampling_metadata( - &mut trace_with_metadata, - keep, - priority, - decision_maker, - root_idx, - ); + sampler.apply_sampling_metadata(&mut trace_with_metadata, keep, priority, decision_maker, root_idx); // Check that decision maker tag was added let modified_root = &trace_with_metadata.spans()[root_idx]; From 2a77b755caa0ef80a5b29ce45b798d1667177f4a Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 15:14:21 -0500 Subject: [PATCH 28/53] move missing functionality to docs of trace_sampler --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 2e7ce44bcc..111679f6e5 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -11,6 +11,7 @@ //! add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go //! adding missing samplers (priority, nopriority, rare) //! add error tracking standalone mode +//! Make error_tps and extra_sample_rate configurable use async_trait::async_trait; use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; @@ -43,7 +44,6 @@ const PRIORITY_AUTO_DROP: i32 = 0; const PRIORITY_AUTO_KEEP: i32 = 1; const PRIORITY_USER_KEEP: i32 = 2; -// TODO: Make error_tps and extra_sample_rate configurable const ERROR_TPS: f64 = 10.0; // Default error TPS target const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate From bce63b0e3b16eb70092bed9d9517b29f812d6531 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 23 Jan 2026 15:19:24 -0500 Subject: [PATCH 29/53] set probilistic sampler to false by default --- lib/saluki-components/src/common/datadog/apm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index c9e0a76874..337bd7247c 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -23,7 +23,7 @@ const fn default_error_tracking_standalone_enabled() -> bool { } const fn default_probabilistic_sampling_enabled() -> bool { - true + false } const fn default_peer_tags_aggregation() -> bool { true From 9f9449e307ef2f639b6b450b798400e512a29ee9 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 26 Jan 2026 11:15:45 -0500 Subject: [PATCH 30/53] make error_tps configurable --- lib/saluki-components/src/common/datadog/apm.rs | 2 +- .../src/transforms/trace_sampler/mod.rs | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index 337bd7247c..dd3c846e53 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -53,7 +53,7 @@ struct ProbabilisticSamplerConfig { /// When enabled, the trace sampler keeps approximately `sampling_percentage` of traces using a /// deterministic hash of the trace ID. /// - /// Defaults to `true`. + /// Defaults to `false`. #[serde(default = "default_probabilistic_sampling_enabled")] enabled: bool, diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 111679f6e5..b73c2d5978 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -11,7 +11,6 @@ //! add trace metrics: datadog-agent/pkg/trace/sampler/metrics.go //! adding missing samplers (priority, nopriority, rare) //! add error tracking standalone mode -//! Make error_tps and extra_sample_rate configurable use async_trait::async_trait; use memory_accounting::{MemoryBounds, MemoryBoundsBuilder}; @@ -29,6 +28,7 @@ use saluki_error::GenericError; use stringtheory::MetaString; use tokio::select; use tracing::debug; +use tracing::info; mod core_sampler; mod errors; @@ -44,8 +44,7 @@ const PRIORITY_AUTO_DROP: i32 = 0; const PRIORITY_AUTO_KEEP: i32 = 1; const PRIORITY_USER_KEEP: i32 = 2; -const ERROR_TPS: f64 = 10.0; // Default error TPS target -const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate +const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate (matches agent's ExtraSampleRate) // Sampling metadata keys / values (matching datadog-agent where applicable). const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; @@ -90,7 +89,10 @@ impl TransformBuilder for TraceSamplerConfiguration { error_sampling_enabled: self.apm_config.error_sampling_enabled(), error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), probabilistic_sampler_enabled: self.apm_config.probabilistic_sampler_enabled(), - error_sampler: errors::ErrorsSampler::new(ERROR_TPS, ERROR_SAMPLE_RATE), + error_sampler: errors::ErrorsSampler::new( + self.apm_config.errors_per_second(), + ERROR_SAMPLE_RATE, + ), }; Ok(Box::new(sampler)) @@ -376,6 +378,7 @@ impl Transform for TraceSampler { // decision_maker is the tag that indicates the decision maker (probabilistic, error, etc.) // root_span_idx is the index of the root span of the trace let (keep, priority, decision_maker, root_span_idx) = self.run_samplers(&mut trace); + info!("WACKTEST keep is {}", keep); if keep { if let Some(root_idx) = root_span_idx { self.apply_sampling_metadata( @@ -392,6 +395,7 @@ impl Transform for TraceSampler { .dispatcher() .buffered() .expect("default output should always exist"); + dispatcher.push(Event::Trace(trace)).await?; dispatcher.flush().await?; } else if !self.error_tracking_standalone { From 97e170184556f0b5533cbc3ac3471996432f63da Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 26 Jan 2026 11:17:56 -0500 Subject: [PATCH 31/53] turn off error sampling by default (the agent has this on by default) --- lib/saluki-components/src/common/datadog/apm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index dd3c846e53..5b19df6956 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -15,7 +15,7 @@ const fn default_sampling_percentage() -> f64 { } const fn default_error_sampling_enabled() -> bool { - true + false } const fn default_error_tracking_standalone_enabled() -> bool { From 6c2b03949e89b4dd19fa6cc04f1c1b22781aa45c Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 26 Jan 2026 11:20:18 -0500 Subject: [PATCH 32/53] make fmt --- lib/saluki-components/src/common/datadog/apm.rs | 2 +- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index 5b19df6956..77f87213c6 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -15,7 +15,7 @@ const fn default_sampling_percentage() -> f64 { } const fn default_error_sampling_enabled() -> bool { - false + false } const fn default_error_tracking_standalone_enabled() -> bool { diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index b73c2d5978..29b5c7dc55 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -89,10 +89,7 @@ impl TransformBuilder for TraceSamplerConfiguration { error_sampling_enabled: self.apm_config.error_sampling_enabled(), error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), probabilistic_sampler_enabled: self.apm_config.probabilistic_sampler_enabled(), - error_sampler: errors::ErrorsSampler::new( - self.apm_config.errors_per_second(), - ERROR_SAMPLE_RATE, - ), + error_sampler: errors::ErrorsSampler::new(self.apm_config.errors_per_second(), ERROR_SAMPLE_RATE), }; Ok(Box::new(sampler)) @@ -395,7 +392,7 @@ impl Transform for TraceSampler { .dispatcher() .buffered() .expect("default output should always exist"); - + dispatcher.push(Event::Trace(trace)).await?; dispatcher.flush().await?; } else if !self.error_tracking_standalone { From 3aa3e311883f2d1f39a6a7b6bf506d0463434765 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 26 Jan 2026 11:37:34 -0500 Subject: [PATCH 33/53] oops remove print --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 29b5c7dc55..fa44a92c30 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -28,7 +28,6 @@ use saluki_error::GenericError; use stringtheory::MetaString; use tokio::select; use tracing::debug; -use tracing::info; mod core_sampler; mod errors; @@ -375,7 +374,6 @@ impl Transform for TraceSampler { // decision_maker is the tag that indicates the decision maker (probabilistic, error, etc.) // root_span_idx is the index of the root span of the trace let (keep, priority, decision_maker, root_span_idx) = self.run_samplers(&mut trace); - info!("WACKTEST keep is {}", keep); if keep { if let Some(root_idx) = root_span_idx { self.apply_sampling_metadata( From f16ccf81f0438afb87b6b70347b38405e1add8ab Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 26 Jan 2026 12:13:57 -0500 Subject: [PATCH 34/53] nvm change it back to true --- lib/saluki-components/src/common/datadog/apm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index 77f87213c6..f1e98d9dc8 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -15,7 +15,7 @@ const fn default_sampling_percentage() -> f64 { } const fn default_error_sampling_enabled() -> bool { - false + true } const fn default_error_tracking_standalone_enabled() -> bool { From 8cde3f84e5d4a6273e087edbad091fddd5b92c88 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 26 Jan 2026 12:21:35 -0500 Subject: [PATCH 35/53] move constant to shared folder --- lib/saluki-components/src/common/datadog/apm.rs | 2 +- lib/saluki-components/src/common/datadog/mod.rs | 3 +++ .../src/common/otlp/traces/transform.rs | 2 +- .../src/common/otlp/traces/translator.rs | 3 ++- .../src/encoders/datadog/traces/mod.rs | 1 - .../src/transforms/trace_sampler/mod.rs | 13 ++++++------- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index f1e98d9dc8..dd3c846e53 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -15,7 +15,7 @@ const fn default_sampling_percentage() -> f64 { } const fn default_error_sampling_enabled() -> bool { - true + true } const fn default_error_tracking_standalone_enabled() -> bool { diff --git a/lib/saluki-components/src/common/datadog/mod.rs b/lib/saluki-components/src/common/datadog/mod.rs index a29b6f5976..823e8b2bde 100644 --- a/lib/saluki-components/src/common/datadog/mod.rs +++ b/lib/saluki-components/src/common/datadog/mod.rs @@ -9,6 +9,9 @@ mod retry; pub mod telemetry; pub mod transaction; +/// Metric key used to store Datadog sampling priority (`_sampling_priority_v1`). +pub const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; + /// Default compressed size limit for intake requests. pub const DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT: usize = 3_200_000; // 3 MiB diff --git a/lib/saluki-components/src/common/otlp/traces/transform.rs b/lib/saluki-components/src/common/otlp/traces/transform.rs index 2342091da0..224253fe2b 100644 --- a/lib/saluki-components/src/common/otlp/traces/transform.rs +++ b/lib/saluki-components/src/common/otlp/traces/transform.rs @@ -20,6 +20,7 @@ use serde_json::{Map as JsonMap, Value as JsonValue}; use stringtheory::MetaString; use tracing::error; +use crate::common::datadog::SAMPLING_PRIORITY_METRIC_KEY; use crate::common::otlp::attributes::{get_int_attribute, HTTP_MAPPINGS}; use crate::common::otlp::traces::normalize::{normalize_service, normalize_tag_value}; use crate::common::otlp::traces::normalize::{truncate_utf8, MAX_RESOURCE_LEN}; @@ -29,7 +30,6 @@ use crate::common::otlp::util::{ DEPLOYMENT_ENVIRONMENT_KEY, KEY_DATADOG_CONTAINER_ID, KEY_DATADOG_ENVIRONMENT, KEY_DATADOG_VERSION, }; -pub(crate) const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; const EVENT_EXTRACTION_METRIC_KEY: &str = "_dd1.sr.eausr"; const ANALYTICS_EVENT_KEY: &str = "analytics.event"; const HTTP_REQUEST_HEADER_PREFIX: &str = "http.request.header."; diff --git a/lib/saluki-components/src/common/otlp/traces/translator.rs b/lib/saluki-components/src/common/otlp/traces/translator.rs index 66d8bd0ad0..1116c20fce 100644 --- a/lib/saluki-components/src/common/otlp/traces/translator.rs +++ b/lib/saluki-components/src/common/otlp/traces/translator.rs @@ -6,6 +6,7 @@ use saluki_context::tags::TagSet; use saluki_core::data_model::event::trace::{Span as DdSpan, Trace, TraceSampling}; use saluki_core::data_model::event::Event; +use crate::common::datadog::SAMPLING_PRIORITY_METRIC_KEY; use crate::common::otlp::config::TracesConfig; use crate::common::otlp::traces::transform::otel_span_to_dd_span; use crate::common::otlp::traces::transform::otlp_value_to_string; @@ -70,7 +71,7 @@ impl OtlpTracesTranslator { ); // Track last-seen priority for this trace (overwrites previous values) - if let Some(&priority) = dd_span.metrics().get("_sampling_priority_v1") { + if let Some(&priority) = dd_span.metrics().get(SAMPLING_PRIORITY_METRIC_KEY) { priorities_by_id.insert(trace_id, priority as i32); } diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index 98231b828d..91a21ca9a4 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -65,7 +65,6 @@ static CONTENT_TYPE_PROTOBUF: HeaderValue = HeaderValue::from_static("applicatio // Sampling metadata keys / values. const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; const TAG_DECISION_MAKER: &str = "_dd.p.dm"; -const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; const DEFAULT_CHUNK_PRIORITY: i32 = 1; // PRIORITY_AUTO_KEEP fn default_serializer_compressor_kind() -> String { diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index fa44a92c30..37b8ee3d0c 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -36,7 +36,7 @@ mod score_sampler; mod signature; use self::probabilistic::PROB_RATE_KEY; -use crate::common::datadog::apm::ApmConfig; +use crate::common::datadog::{apm::ApmConfig, SAMPLING_PRIORITY_METRIC_KEY}; // Sampling priority constants (matching datadog-agent) const PRIORITY_AUTO_DROP: i32 = 0; @@ -46,7 +46,6 @@ const PRIORITY_USER_KEEP: i32 = 2; const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate (matches agent's ExtraSampleRate) // Sampling metadata keys / values (matching datadog-agent where applicable). -const SAMPLING_PRIORITY_METRIC_KEY: &str = "_sampling_priority_v1"; const TAG_DECISION_MAKER: &str = "_dd.p.dm"; // Single Span Sampling and Analytics Events keys @@ -538,7 +537,7 @@ mod tests { // Test trace with user-set priority = 2 (UserKeep) let mut metrics = HashMap::new(); - metrics.insert("_sampling_priority_v1".to_string(), 2.0); + metrics.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 2.0); let span = create_test_span_with_metrics(12345, 1, metrics); let trace = create_test_trace(vec![span]); let root_idx = sampler.get_root_span_index(&trace).unwrap(); @@ -547,7 +546,7 @@ mod tests { // Test trace with user-set priority = -1 (UserDrop) let mut metrics = HashMap::new(); - metrics.insert("_sampling_priority_v1".to_string(), -1.0); + metrics.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), -1.0); let span = create_test_span_with_metrics(12345, 1, metrics); let trace = create_test_trace(vec![span]); let root_idx = sampler.get_root_span_index(&trace).unwrap(); @@ -569,11 +568,11 @@ mod tests { // Test trace-level priority overrides span priorities (last-seen priority) // Create spans with different priorities - root has 0, later span has 2 let mut metrics_root = HashMap::new(); - metrics_root.insert("_sampling_priority_v1".to_string(), 0.0); + metrics_root.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 0.0); let root_span = create_test_span_with_metrics(12345, 1, metrics_root); let mut metrics_later = HashMap::new(); - metrics_later.insert("_sampling_priority_v1".to_string(), 1.0); + metrics_later.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 1.0); let later_span = create_test_span_with_metrics(12345, 2, metrics_later).with_parent_id(1); let mut trace = create_test_trace(vec![root_span, later_span]); @@ -680,7 +679,7 @@ mod tests { sampler.probabilistic_sampler_enabled = false; // Use legacy path let mut metrics = HashMap::new(); - metrics.insert("_sampling_priority_v1".to_string(), 2.0); + metrics.insert(SAMPLING_PRIORITY_METRIC_KEY.to_string(), 2.0); let span = create_test_span_with_metrics(12345, 1, metrics); let mut trace = create_test_trace(vec![span]); From 96f28a9a5d37d82caa6012e8af7b29544f9eb0a2 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 27 Jan 2026 14:47:41 -0500 Subject: [PATCH 36/53] add unit tests for errors sampler --- .../src/transforms/trace_sampler/errors.rs | 220 ++++++++++++++++++ .../transforms/trace_sampler/score_sampler.rs | 15 ++ 2 files changed, 235 insertions(+) diff --git a/lib/saluki-components/src/transforms/trace_sampler/errors.rs b/lib/saluki-components/src/transforms/trace_sampler/errors.rs index 5518c289d3..e300c0d651 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/errors.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/errors.rs @@ -34,3 +34,223 @@ impl ErrorsSampler { self.score_sampler.sample(now, trace, root_idx) } } + +#[cfg(test)] +mod tests { + // logic for these tests are taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/scoresampler_test.go#L23 + use std::time::{Duration, SystemTime}; + + use saluki_context::tags::TagSet; + use saluki_core::data_model::event::trace::{Span, Trace}; + use stringtheory::MetaString; + + use super::*; + use crate::transforms::trace_sampler::signature::{compute_signature_with_root_and_env, Signature}; + + const BUCKET_DURATION: Duration = Duration::from_secs(5); + + /// Create a test ErrorsSampler with the given TPS configuration. + fn get_test_errors_sampler(tps: f64) -> ErrorsSampler { + // No extra fixed sampling, no maximum TPS + ErrorsSampler::new(tps, 1.0) + } + + /// Create a test trace with deterministic IDs. + fn get_test_trace(trace_id: u64) -> (Trace, usize) { + // Root span + let root = Span::new( + MetaString::from("mcnulty"), + MetaString::from("GET /api"), + MetaString::from("resource"), + MetaString::from("web"), + trace_id, + 1, // span_id + 0, // parent_id + 42, // start + 1000000, // duration + 0, // error + ); + + // Child span + let child = Span::new( + MetaString::from("mcnulty"), + MetaString::from("SELECT * FROM users"), + MetaString::from("resource"), + MetaString::from("sql"), + trace_id, + 2, // span_id + 1, // parent_id + 100, // start + 200000, // duration + 0, // error + ); + + let trace = Trace::new(vec![root, child], TagSet::default()); + (trace, 0) // Root is at index 0 + } + + /// Create a test trace with error in root span. + fn get_test_trace_with_error(trace_id: u64) -> (Trace, usize) { + // Root span with error + let root = Span::new( + MetaString::from("mcnulty"), + MetaString::from("GET /api"), + MetaString::from("resource"), + MetaString::from("web"), + trace_id, + 1, // span_id + 0, // parent_id + 42, // start + 1000000, // duration + 1, // error = 1 + ); + + // Child span + let child = Span::new( + MetaString::from("mcnulty"), + MetaString::from("SELECT * FROM users"), + MetaString::from("resource"), + MetaString::from("sql"), + trace_id, + 2, // span_id + 1, // parent_id + 100, // start + 200000, // duration + 0, // error + ); + + let trace = Trace::new(vec![root, child], TagSet::default()); + (trace, 0) // Root is at index 0 + } + + #[test] + fn test_shrink() { + // Test that shrink preserves first signatures and collapses later ones. + // The shrink logic activates when size() >= SHRINK_CARDINALITY/2 (100). + // When it activates, it builds an allow-list from the current `rates` map. + // Since `shrink` runs before `count_weighted_sig` in the sample flow, we must + // advance time one iteration BEFORE hitting the threshold so that `update_rates` + // runs and populates `rates` with the first batch of signatures. + let mut sampler = get_test_errors_sampler(10.0); + let test_time = SystemTime::now(); + let shrink_cardinality = ScoreSampler::test_shrink_cardinality(); + let threshold = shrink_cardinality / 2; // 100 + + let mut sigs = Vec::new(); + // Generate 3*shrinkCardinality signatures with different services + for i in 0..(3 * shrink_cardinality) { + let (mut trace, root_idx) = get_test_trace(3); + let spans = trace.spans_mut(); + // modify the non root span to create unique signatures + spans[1] = spans[1].clone().with_service(MetaString::from(format!("service_{}", i + 1000))); + + let signature = compute_signature_with_root_and_env(&trace, root_idx); + sigs.push(signature); + + // Advance time at threshold-1 so update_rates runs BEFORE shrink activates. + // This populates rates with (threshold-1) signatures. + let sample_time = if i >= threshold - 1 { + test_time + BUCKET_DURATION + } else { + test_time + }; + sampler.sample_error(sample_time, &mut trace, root_idx); + } + + // Verify first (threshold-1) signatures are preserved (they're in the allow-list) + let threshold = shrink_cardinality / 2; + for i in 0..(threshold - 1) { + assert_eq!( + sigs[i], + sampler.score_sampler.test_shrink(sigs[i]), + "Signature at index {} should be preserved", + i + ); + } + + // Verify signatures from 2*shrinkCardinality onwards are shrunk + for i in (2 * shrink_cardinality)..(3 * shrink_cardinality - 1) { + let expected = Signature(sigs[i].0 % (threshold as u64)); + assert_eq!( + expected, + sampler.score_sampler.test_shrink(sigs[i]), + "Signature at index {} should be shrunk", + i + ); + } + + // Final size should be bounded by shrink_cardinality + let size = sampler.score_sampler.test_size(); + assert!( + size <= shrink_cardinality as i64, + "Size {} should be <= {}", + size, + shrink_cardinality + ); + } + + #[test] + fn test_disable() { + // Create a disabled sampler (TPS = 0) + let mut sampler = get_test_errors_sampler(0.0); + + // The sampler should never sample anything + for i in 0..100 { + let (mut trace, root_idx) = get_test_trace_with_error(i); + let sampled = sampler.sample_error(SystemTime::now(), &mut trace, root_idx); + assert!(!sampled, "Disabled sampler should never sample (iteration {})", i); + } + } + + #[test] + fn test_target_tps() { + // Test the effectiveness of the targetTPS option + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/sampler/scoresampler_test.go#L102 + let target_tps = 10.0; + let mut sampler = get_test_errors_sampler(target_tps); + + let generated_tps = 200.0; + let init_periods = 2; + let periods = 10; + + let period_seconds = BUCKET_DURATION.as_secs() as f64; + let traces_per_period = (generated_tps * period_seconds) as usize; + + let mut sampled_count = 0; + let mut test_time = SystemTime::now(); + + for period in 0..(init_periods + periods) { + test_time = test_time + BUCKET_DURATION; + + for i in 0..traces_per_period { + let (mut trace, root_idx) = get_test_trace_with_error( + (period * traces_per_period + i) as u64 + ); + let sampled = sampler.sample_error(test_time, &mut trace, root_idx); + + // Once we got into the stable regime, count the samples + if period >= init_periods && sampled { + sampled_count += 1; + } + } + } + + // We should keep approximately the right percentage of traces + let expected_ratio = target_tps / generated_tps; + let actual_ratio = sampled_count as f64 / (traces_per_period as f64 * periods as f64); + + assert!( + (actual_ratio - expected_ratio).abs() / expected_ratio < 0.2, + "Expected ratio {:.4}, got {:.4} (sampled {} out of {})", + expected_ratio, actual_ratio, sampled_count, traces_per_period * periods + ); + + // We should have a throughput of sampled traces around targetTPS + let actual_tps = sampled_count as f64 / (periods as f64 * BUCKET_DURATION.as_secs() as f64); + assert!( + (actual_tps - target_tps).abs() / target_tps < 0.2, + "Expected TPS {:.2}, got {:.2}", + target_tps, actual_tps + ); + } +} diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs index 9eb918aba2..c8724deb54 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -135,6 +135,21 @@ impl ScoreSampler { } } +#[cfg(test)] +impl ScoreSampler { + pub(crate) fn test_shrink(&mut self, sig: Signature) -> Signature { + self.shrink(sig) + } + + pub(crate) fn test_size(&self) -> i64 { + self.sampler.size() + } + + pub(crate) fn test_shrink_cardinality() -> usize { + SHRINK_CARDINALITY + } +} + /// Calculate the weight from the span's global rate and presampler rate. fn weight_root(span: &Span) -> f32 { let client_rate = span From 7a8f5404cac49eb2abd416f743a79a115d99f6dc Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 27 Jan 2026 14:54:23 -0500 Subject: [PATCH 37/53] make check clippy and fmt --- .../src/transforms/trace_sampler/errors.rs | 69 +++++++++++-------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/errors.rs b/lib/saluki-components/src/transforms/trace_sampler/errors.rs index e300c0d651..805a741d7f 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/errors.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/errors.rs @@ -64,11 +64,11 @@ mod tests { MetaString::from("resource"), MetaString::from("web"), trace_id, - 1, // span_id - 0, // parent_id - 42, // start + 1, // span_id + 0, // parent_id + 42, // start 1000000, // duration - 0, // error + 0, // error ); // Child span @@ -78,11 +78,11 @@ mod tests { MetaString::from("resource"), MetaString::from("sql"), trace_id, - 2, // span_id - 1, // parent_id - 100, // start + 2, // span_id + 1, // parent_id + 100, // start 200000, // duration - 0, // error + 0, // error ); let trace = Trace::new(vec![root, child], TagSet::default()); @@ -98,11 +98,11 @@ mod tests { MetaString::from("resource"), MetaString::from("web"), trace_id, - 1, // span_id - 0, // parent_id - 42, // start + 1, // span_id + 0, // parent_id + 42, // start 1000000, // duration - 1, // error = 1 + 1, // error = 1 ); // Child span @@ -112,11 +112,11 @@ mod tests { MetaString::from("resource"), MetaString::from("sql"), trace_id, - 2, // span_id - 1, // parent_id - 100, // start + 2, // span_id + 1, // parent_id + 100, // start 200000, // duration - 0, // error + 0, // error ); let trace = Trace::new(vec![root, child], TagSet::default()); @@ -125,7 +125,7 @@ mod tests { #[test] fn test_shrink() { - // Test that shrink preserves first signatures and collapses later ones. + // Test that shrink preserves first signatures and collapses later ones. // The shrink logic activates when size() >= SHRINK_CARDINALITY/2 (100). // When it activates, it builds an allow-list from the current `rates` map. // Since `shrink` runs before `count_weighted_sig` in the sample flow, we must @@ -142,7 +142,9 @@ mod tests { let (mut trace, root_idx) = get_test_trace(3); let spans = trace.spans_mut(); // modify the non root span to create unique signatures - spans[1] = spans[1].clone().with_service(MetaString::from(format!("service_{}", i + 1000))); + spans[1] = spans[1] + .clone() + .with_service(MetaString::from(format!("service_{}", i + 1000))); let signature = compute_signature_with_root_and_env(&trace, root_idx); sigs.push(signature); @@ -159,21 +161,26 @@ mod tests { // Verify first (threshold-1) signatures are preserved (they're in the allow-list) let threshold = shrink_cardinality / 2; - for i in 0..(threshold - 1) { + for (i, sig) in sigs.iter().enumerate().take(threshold - 1) { assert_eq!( - sigs[i], - sampler.score_sampler.test_shrink(sigs[i]), + *sig, + sampler.score_sampler.test_shrink(*sig), "Signature at index {} should be preserved", i ); } // Verify signatures from 2*shrinkCardinality onwards are shrunk - for i in (2 * shrink_cardinality)..(3 * shrink_cardinality - 1) { - let expected = Signature(sigs[i].0 % (threshold as u64)); + for (i, sig) in sigs + .iter() + .enumerate() + .skip(2 * shrink_cardinality) + .take(shrink_cardinality - 1) + { + let expected = Signature(sig.0 % (threshold as u64)); assert_eq!( expected, - sampler.score_sampler.test_shrink(sigs[i]), + sampler.score_sampler.test_shrink(*sig), "Signature at index {} should be shrunk", i ); @@ -220,12 +227,10 @@ mod tests { let mut test_time = SystemTime::now(); for period in 0..(init_periods + periods) { - test_time = test_time + BUCKET_DURATION; + test_time += BUCKET_DURATION; for i in 0..traces_per_period { - let (mut trace, root_idx) = get_test_trace_with_error( - (period * traces_per_period + i) as u64 - ); + let (mut trace, root_idx) = get_test_trace_with_error((period * traces_per_period + i) as u64); let sampled = sampler.sample_error(test_time, &mut trace, root_idx); // Once we got into the stable regime, count the samples @@ -242,7 +247,10 @@ mod tests { assert!( (actual_ratio - expected_ratio).abs() / expected_ratio < 0.2, "Expected ratio {:.4}, got {:.4} (sampled {} out of {})", - expected_ratio, actual_ratio, sampled_count, traces_per_period * periods + expected_ratio, + actual_ratio, + sampled_count, + traces_per_period * periods ); // We should have a throughput of sampled traces around targetTPS @@ -250,7 +258,8 @@ mod tests { assert!( (actual_tps - target_tps).abs() / target_tps < 0.2, "Expected TPS {:.2}, got {:.2}", - target_tps, actual_tps + target_tps, + actual_tps ); } } From 5fce43765d6814c7c5f497fc6fcce80f0d9dfdbd Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 27 Jan 2026 15:27:31 -0500 Subject: [PATCH 38/53] change probabilistic sampling to on by default --- lib/saluki-components/src/common/datadog/apm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index dd3c846e53..915b075d13 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -23,7 +23,7 @@ const fn default_error_tracking_standalone_enabled() -> bool { } const fn default_probabilistic_sampling_enabled() -> bool { - false + true } const fn default_peer_tags_aggregation() -> bool { true @@ -53,7 +53,7 @@ struct ProbabilisticSamplerConfig { /// When enabled, the trace sampler keeps approximately `sampling_percentage` of traces using a /// deterministic hash of the trace ID. /// - /// Defaults to `false`. + /// Defaults to `true`. #[serde(default = "default_probabilistic_sampling_enabled")] enabled: bool, From 6b3dc7dffe9f9ac911261cdddf75741531bf1469 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 27 Jan 2026 15:35:03 -0500 Subject: [PATCH 39/53] make fmt --- lib/saluki-components/src/common/datadog/apm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index 915b075d13..c9e0a76874 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -23,7 +23,7 @@ const fn default_error_tracking_standalone_enabled() -> bool { } const fn default_probabilistic_sampling_enabled() -> bool { - true + true } const fn default_peer_tags_aggregation() -> bool { true From c529497df77e995aedc68d57e0c0479be721adba Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 27 Jan 2026 16:16:46 -0500 Subject: [PATCH 40/53] make fmt --- bin/agent-data-plane/src/cli/run.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/agent-data-plane/src/cli/run.rs b/bin/agent-data-plane/src/cli/run.rs index a32c1a55b2..62786cd0d5 100644 --- a/bin/agent-data-plane/src/cli/run.rs +++ b/bin/agent-data-plane/src/cli/run.rs @@ -25,8 +25,7 @@ use saluki_components::{ transforms::{ AggregateConfiguration, ApmStatsTransformConfiguration, ChainedConfiguration, DogstatsDMapperConfiguration, DogstatsDPrefixFilterConfiguration, HostEnrichmentConfiguration, HostTagsConfiguration, - TraceSamplerConfiguration, - TraceObfuscationConfiguration, + TraceObfuscationConfiguration, TraceSamplerConfiguration, }, }; use saluki_config::{ConfigurationLoader, GenericConfiguration}; From e37b5f5c92112639da5af7f04add75d7f734af91 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 27 Jan 2026 16:37:27 -0500 Subject: [PATCH 41/53] change probabilistic back to false --- lib/saluki-components/src/common/datadog/apm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/saluki-components/src/common/datadog/apm.rs b/lib/saluki-components/src/common/datadog/apm.rs index 8f2879b44e..08c01c624f 100644 --- a/lib/saluki-components/src/common/datadog/apm.rs +++ b/lib/saluki-components/src/common/datadog/apm.rs @@ -25,7 +25,7 @@ const fn default_error_tracking_standalone_enabled() -> bool { } const fn default_probabilistic_sampling_enabled() -> bool { - true + false } const fn default_peer_tags_aggregation() -> bool { true @@ -55,7 +55,7 @@ struct ProbabilisticSamplerConfig { /// When enabled, the trace sampler keeps approximately `sampling_percentage` of traces using a /// deterministic hash of the trace ID. /// - /// Defaults to `true`. + /// Defaults to `false`. #[serde(default = "default_probabilistic_sampling_enabled")] enabled: bool, From e7ec2476f6670302f3a0a4276c1a3da8f128c6b1 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 27 Jan 2026 17:00:18 -0500 Subject: [PATCH 42/53] add no priority sampler --- .../src/transforms/trace_sampler/mod.rs | 15 +++++++++--- .../transforms/trace_sampler/score_sampler.rs | 23 ++++++++++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 37b8ee3d0c..9eb3d07b42 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -88,6 +88,10 @@ impl TransformBuilder for TraceSamplerConfiguration { error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), probabilistic_sampler_enabled: self.apm_config.probabilistic_sampler_enabled(), error_sampler: errors::ErrorsSampler::new(self.apm_config.errors_per_second(), ERROR_SAMPLE_RATE), + no_priority_sampler: score_sampler::NoPrioritySampler::new( + self.apm_config.target_traces_per_second(), + ERROR_SAMPLE_RATE, + ), }; Ok(Box::new(sampler)) @@ -106,6 +110,7 @@ pub struct TraceSampler { error_sampling_enabled: bool, probabilistic_sampler_enabled: bool, error_sampler: errors::ErrorsSampler, + no_priority_sampler: score_sampler::NoPrioritySampler, } impl TraceSampler { @@ -301,11 +306,14 @@ impl TraceSampler { return (prob_keep, priority, decision_maker, Some(root_span_idx)); } - if let Some(user_priority) = self.get_user_priority(trace, root_span_idx) { - if user_priority > 0 { + let user_priority = self.get_user_priority(trace, root_span_idx); + if let Some(priority) = user_priority { + if priority > 0 { // User wants to keep this trace - return (true, user_priority, DECISION_MAKER_MANUAL_PRIORITY, Some(root_span_idx)); + return (true, priority, DECISION_MAKER_MANUAL_PRIORITY, Some(root_span_idx)); } + } else if self.no_priority_sampler.sample(now, trace, root_span_idx) { + return (true, PRIORITY_AUTO_KEEP, "", Some(root_span_idx)); } if self.error_sampling_enabled && self.trace_contains_error(trace, false) { @@ -491,6 +499,7 @@ mod tests { error_tracking_standalone: false, probabilistic_sampler_enabled: true, error_sampler: errors::ErrorsSampler::new(10.0, 1.0), + no_priority_sampler: score_sampler::NoPrioritySampler::new(10.0, 1.0), } } diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs index c8724deb54..7d44803a85 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -13,6 +13,7 @@ const KEY_SAMPLING_RATE_PRE_SAMPLER: &str = "_dd1.sr.rapre"; // ScoreSampler-specific rate keys pub(super) const ERRORS_RATE_KEY: &str = "_dd.errors_sr"; +pub(super) const NO_PRIORITY_RATE_KEY: &str = "_dd.no_p_sr"; // shrinkCardinality is the max Signature cardinality before shrinking const SHRINK_CARDINALITY: usize = 200; @@ -33,7 +34,6 @@ const SAMPLER_HASHER: u64 = 1111111111111111111; /// # Missing /// /// TODO: Add SampleV1 method for legacy trace format support -/// TODO: Add NoPrioritySampler implementation pub struct ScoreSampler { sampler: Sampler, sampling_rate_key: &'static str, @@ -42,6 +42,27 @@ pub struct ScoreSampler { shrink_allow_list: Option>, } +/// NoPrioritySampler for traces without a sampling priority. +/// +/// Wraps a ScoreSampler configured specifically for no-priority sampling. +pub(super) struct NoPrioritySampler { + score_sampler: ScoreSampler, +} + +impl NoPrioritySampler { + /// Create a new NoPrioritySampler with the given configuration. + pub(super) fn new(target_tps: f64, extra_sample_rate: f64) -> Self { + Self { + score_sampler: ScoreSampler::new(NO_PRIORITY_RATE_KEY, false, target_tps, extra_sample_rate), + } + } + + /// Evaluate a trace that has no sampling priority. + pub(super) fn sample(&mut self, now: SystemTime, trace: &mut Trace, root_idx: usize) -> bool { + self.score_sampler.sample(now, trace, root_idx) + } +} + impl ScoreSampler { /// Create a new ScoreSampler with the given sampling rate key and target TPS. pub fn new(sampling_rate_key: &'static str, disabled: bool, target_tps: f64, extra_sample_rate: f64) -> Self { From 7d0e8ccba8b935e7f409eef7cde969e0eb4ca96f Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 10:10:24 -0500 Subject: [PATCH 43/53] ???? --- .../src/encoders/datadog/traces/mod.rs | 48 ++++++- .../src/transforms/trace_sampler/mod.rs | 121 +++++++++++++++++- 2 files changed, 165 insertions(+), 4 deletions(-) diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index 91a21ca9a4..0e7541767c 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -19,6 +19,7 @@ use saluki_config::GenericConfiguration; use saluki_context::tags::TagSet; use saluki_core::data_model::event::trace::{ AttributeScalarValue, AttributeValue, Span as DdSpan, SpanEvent as DdSpanEvent, SpanLink as DdSpanLink, + TraceSampling, }; use saluki_core::topology::{EventsBuffer, PayloadsBuffer}; use saluki_core::{ @@ -65,7 +66,12 @@ static CONTENT_TYPE_PROTOBUF: HeaderValue = HeaderValue::from_static("applicatio // Sampling metadata keys / values. const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; const TAG_DECISION_MAKER: &str = "_dd.p.dm"; +const DECISION_MAKER_PROBABILISTIC: &str = "-9"; const DEFAULT_CHUNK_PRIORITY: i32 = 1; // PRIORITY_AUTO_KEEP +const OTEL_TRACE_ID_META_KEY: &str = "otel.trace_id"; +const MAX_TRACE_ID: u64 = u64::MAX; +const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; +const SAMPLER_HASHER: u64 = 1111111111111111111; fn default_serializer_compressor_kind() -> String { "zstd".to_string() @@ -551,6 +557,27 @@ impl TraceEndpointEncoder { rate } + fn should_add_otlp_decision_maker(&self, trace: &Trace, sampling: &TraceSampling) -> bool { + if self.apm_config.probabilistic_sampler_enabled() { + return false; + } + if sampling.decision_maker.is_some() { + return false; + } + let priority = sampling.priority.unwrap_or(DEFAULT_CHUNK_PRIORITY); + if sampling.dropped_trace || priority <= 0 { + return false; + } + if !trace_has_otel_trace_id(trace) { + return false; + } + let trace_id = match trace.spans().first() { + Some(span) => span.trace_id(), + None => return false, + }; + sample_by_rate(trace_id, self.sampling_rate()) + } + fn build_trace_chunk(&self, trace: &Trace) -> TraceChunk { let spans: Vec = trace.spans().iter().map(convert_span).collect(); let mut chunk = TraceChunk::new(); @@ -564,9 +591,12 @@ impl TraceEndpointEncoder { chunk.set_priority(sampling.priority.unwrap_or(DEFAULT_CHUNK_PRIORITY)); chunk.set_droppedTrace(sampling.dropped_trace); - // Set decision maker tag if present + // Set decision maker tag if present, otherwise align OTLP behavior with trace-agent's + // OTLPReceiver when probabilistic sampling is disabled. if let Some(dm) = &sampling.decision_maker { tags.insert(TAG_DECISION_MAKER.to_string(), dm.to_string()); + } else if self.should_add_otlp_decision_maker(trace, sampling) { + tags.insert(TAG_DECISION_MAKER.to_string(), DECISION_MAKER_PROBABILISTIC.to_string()); } // Set OTLP sampling rate tag if present (from sampler) @@ -626,6 +656,22 @@ impl EndpointEncoder for TraceEndpointEncoder { } } +fn trace_has_otel_trace_id(trace: &Trace) -> bool { + let Some(span) = trace.spans().first() else { + return false; + }; + span.meta() + .contains_key(&MetaString::from_static(OTEL_TRACE_ID_META_KEY)) +} + +fn sample_by_rate(trace_id: u64, rate: f64) -> bool { + if rate < 1.0 { + trace_id.wrapping_mul(SAMPLER_HASHER) < (rate * MAX_TRACE_ID_FLOAT) as u64 + } else { + true + } +} + fn convert_span(span: &DdSpan) -> ProtoSpan { let mut proto = ProtoSpan::new(); proto.set_service(span.service().to_string()); diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 9eb3d07b42..1252ec2ffe 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -27,7 +27,7 @@ use saluki_core::{ use saluki_error::GenericError; use stringtheory::MetaString; use tokio::select; -use tracing::debug; +use tracing::{debug, info}; mod core_sampler; mod errors; @@ -37,6 +37,7 @@ mod signature; use self::probabilistic::PROB_RATE_KEY; use crate::common::datadog::{apm::ApmConfig, SAMPLING_PRIORITY_METRIC_KEY}; +use crate::common::otlp::config::TracesConfig; // Sampling priority constants (matching datadog-agent) const PRIORITY_AUTO_DROP: i32 = 0; @@ -47,6 +48,7 @@ const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate (matches agent' // Sampling metadata keys / values (matching datadog-agent where applicable). const TAG_DECISION_MAKER: &str = "_dd.p.dm"; +const OTEL_TRACE_ID_META_KEY: &str = "otel.trace_id"; // Single Span Sampling and Analytics Events keys const KEY_SPAN_SAMPLING_MECHANISM: &str = "_dd.span_sampling.mechanism"; @@ -56,17 +58,46 @@ const KEY_ANALYZED_SPANS: &str = "_dd.analyzed"; const DECISION_MAKER_PROBABILISTIC: &str = "-9"; const DECISION_MAKER_MANUAL_PRIORITY: &str = "-4"; +// Debug tag to identify which sampler path made the decision. +const WACKTEST_SAMPLER_PATH_KEY: &str = "wacktest.sampler_path"; + +const MAX_TRACE_ID: u64 = u64::MAX; +const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; +const SAMPLER_HASHER: u64 = 1111111111111111111; + +fn normalize_sampling_rate(rate: f64) -> f64 { + if rate <= 0.0 || rate >= 1.0 { + 1.0 + } else { + rate + } +} + +fn sample_by_rate(trace_id: u64, rate: f64) -> bool { + if rate < 1.0 { + trace_id.wrapping_mul(SAMPLER_HASHER) < (rate * MAX_TRACE_ID_FLOAT) as u64 + } else { + true + } +} + /// Configuration for the trace sampler transform. #[derive(Debug)] pub struct TraceSamplerConfiguration { apm_config: ApmConfig, + otlp_sampling_rate: f64, } impl TraceSamplerConfiguration { /// Creates a new `TraceSamplerConfiguration` from the given configuration. pub fn from_configuration(config: &GenericConfiguration) -> Result { let apm_config = ApmConfig::from_configuration(config)?; - Ok(Self { apm_config }) + let otlp_traces: TracesConfig = config.try_get_typed("otlp_config.traces")?.unwrap_or_default(); + let otlp_sampling_rate = normalize_sampling_rate(otlp_traces.probabilistic_sampler.sampling_percentage / 100.0); + Ok(Self { + apm_config, + otlp_sampling_rate, + }) } } @@ -87,6 +118,7 @@ impl TransformBuilder for TraceSamplerConfiguration { error_sampling_enabled: self.apm_config.error_sampling_enabled(), error_tracking_standalone: self.apm_config.error_tracking_standalone_enabled(), probabilistic_sampler_enabled: self.apm_config.probabilistic_sampler_enabled(), + otlp_sampling_rate: self.otlp_sampling_rate, error_sampler: errors::ErrorsSampler::new(self.apm_config.errors_per_second(), ERROR_SAMPLE_RATE), no_priority_sampler: score_sampler::NoPrioritySampler::new( self.apm_config.target_traces_per_second(), @@ -109,6 +141,7 @@ pub struct TraceSampler { error_tracking_standalone: bool, error_sampling_enabled: bool, probabilistic_sampler_enabled: bool, + otlp_sampling_rate: f64, error_sampler: errors::ErrorsSampler, no_priority_sampler: score_sampler::NoPrioritySampler, } @@ -193,6 +226,18 @@ impl TraceSampler { probabilistic::ProbabilisticSampler::sample(trace_id, self.sampling_rate) } + fn is_otlp_trace(&self, trace: &Trace, root_span_idx: usize) -> bool { + trace + .spans() + .get(root_span_idx) + .map(|span| span.meta().contains_key(&MetaString::from_static(OTEL_TRACE_ID_META_KEY))) + .unwrap_or(false) + } + + fn sample_otlp(&self, trace_id: u64) -> bool { + sample_by_rate(trace_id, self.otlp_sampling_rate) + } + /// Returns `true` if the trace contains a span with an error. fn trace_contains_error(&self, trace: &Trace, consider_exception_span_events: bool) -> bool { trace.spans().iter().any(|span| { @@ -269,6 +314,16 @@ impl TraceSampler { fn run_samplers(&mut self, trace: &mut Trace) -> (bool, i32, &'static str, Option) { // logic taken from: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L1066 let now = std::time::SystemTime::now(); + println!( + "trace sampler config: probabilistic_sampler_enabled={} sampling_rate={} error_sampling_enabled={}", + self.probabilistic_sampler_enabled, self.sampling_rate, self.error_sampling_enabled + ); + info!( + probabilistic_sampler_enabled = self.probabilistic_sampler_enabled, + sampling_rate = self.sampling_rate, + error_sampling_enabled = self.error_sampling_enabled, + "trace sampler config" + ); // Empty trace check if trace.spans().is_empty() { return (false, PRIORITY_AUTO_DROP, "", None); @@ -288,6 +343,7 @@ impl TraceSampler { if self.sample_probabilistic(root_trace_id) { decision_maker = DECISION_MAKER_PROBABILISTIC; // probabilistic sampling prob_keep = true; + self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic"); if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { let metrics = root_span.metrics_mut(); @@ -295,6 +351,9 @@ impl TraceSampler { } } else if self.error_sampling_enabled && contains_error { prob_keep = self.error_sampler.sample_error(now, trace, root_span_idx); + self.set_wacktest_sampler_path(trace, root_span_idx, "error"); + } else { + self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic_drop"); } let priority = if prob_keep { @@ -310,23 +369,45 @@ impl TraceSampler { if let Some(priority) = user_priority { if priority > 0 { // User wants to keep this trace + self.set_wacktest_sampler_path(trace, root_span_idx, "manual_priority"); return (true, priority, DECISION_MAKER_MANUAL_PRIORITY, Some(root_span_idx)); } + } else if self.is_otlp_trace(trace, root_span_idx) { + let root_trace_id = trace.spans()[root_span_idx].trace_id(); + if self.sample_otlp(root_trace_id) { + if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { + root_span.metrics_mut().remove(PROB_RATE_KEY); + } + self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic"); + return (true, PRIORITY_AUTO_KEEP, DECISION_MAKER_PROBABILISTIC, Some(root_span_idx)); + } + self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic_drop"); } else if self.no_priority_sampler.sample(now, trace, root_span_idx) { + self.set_wacktest_sampler_path(trace, root_span_idx, "no_priority"); return (true, PRIORITY_AUTO_KEEP, "", Some(root_span_idx)); } if self.error_sampling_enabled && self.trace_contains_error(trace, false) { let keep = self.error_sampler.sample_error(now, trace, root_span_idx); if keep { + self.set_wacktest_sampler_path(trace, root_span_idx, "error"); return (true, PRIORITY_AUTO_KEEP, "", Some(root_span_idx)); } } // Default: drop the trace + self.set_wacktest_sampler_path(trace, root_span_idx, "drop"); (false, PRIORITY_AUTO_DROP, "", Some(root_span_idx)) } + fn set_wacktest_sampler_path(&self, trace: &mut Trace, root_span_idx: usize, value: &'static str) { + if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { + root_span + .meta_mut() + .insert(MetaString::from_static(WACKTEST_SAMPLER_PATH_KEY), MetaString::from_static(value)); + } + } + /// Apply sampling metadata to the trace in-place. /// /// The `root_span_id` parameter identifies which span should receive the sampling metadata. @@ -334,6 +415,7 @@ impl TraceSampler { fn apply_sampling_metadata( &self, trace: &mut Trace, keep: bool, priority: i32, decision_maker: &str, root_span_idx: usize, ) { + let is_otlp = self.is_otlp_trace(trace, root_span_idx); let root_span_value = match trace.spans_mut().get_mut(root_span_idx) { Some(span) => span, None => return, @@ -346,6 +428,11 @@ impl TraceSampler { } // Now we can use trace again to set sampling metadata + let sampling_rate = if is_otlp { + self.otlp_sampling_rate + } else { + self.sampling_rate + }; let sampling = TraceSampling::new( !keep, Some(priority), @@ -354,7 +441,7 @@ impl TraceSampler { } else { None }, - Some(MetaString::from(format!("{:.2}", self.sampling_rate))), + Some(MetaString::from(format!("{:.2}", sampling_rate))), ); trace.set_sampling(Some(sampling)); } @@ -498,6 +585,7 @@ mod tests { error_sampling_enabled: true, error_tracking_standalone: false, probabilistic_sampler_enabled: true, + otlp_sampling_rate: 1.0, error_sampler: errors::ErrorsSampler::new(10.0, 1.0), no_priority_sampler: score_sampler::NoPrioritySampler::new(10.0, 1.0), } @@ -877,4 +965,31 @@ mod tests { ); } } + + #[test] + fn test_otlp_legacy_sampling_skips_prob_rate_metric() { + let mut sampler = create_test_sampler(); + sampler.probabilistic_sampler_enabled = false; + sampler.otlp_sampling_rate = 1.0; + + let mut meta = HashMap::new(); + meta.insert(OTEL_TRACE_ID_META_KEY.to_string(), "otel-trace-id".to_string()); + let span = create_test_span_with_meta(12345, 1, meta); + let mut trace = create_test_trace(vec![span]); + + let (keep, priority, decision_maker, root_span_idx) = sampler.run_samplers(&mut trace); + assert!(keep); + assert_eq!(priority, PRIORITY_AUTO_KEEP); + assert_eq!(decision_maker, DECISION_MAKER_PROBABILISTIC); + + let root_span_idx = root_span_idx.expect("root span index should be present"); + sampler.apply_sampling_metadata(&mut trace, keep, priority, decision_maker, root_span_idx); + + let root_span = &trace.spans()[root_span_idx]; + assert_eq!( + root_span.meta().get(TAG_DECISION_MAKER).unwrap(), + &MetaString::from(DECISION_MAKER_PROBABILISTIC) + ); + assert!(!root_span.metrics().contains_key(PROB_RATE_KEY)); + } } From 0e7373a60eb24987571ec98595a98ef127a86b5b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 10:26:26 -0500 Subject: [PATCH 44/53] remove testing code --- .../src/transforms/trace_sampler/mod.rs | 32 +------------------ 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 1252ec2ffe..a855925c8a 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -27,7 +27,7 @@ use saluki_core::{ use saluki_error::GenericError; use stringtheory::MetaString; use tokio::select; -use tracing::{debug, info}; +use tracing::debug; mod core_sampler; mod errors; @@ -58,8 +58,6 @@ const KEY_ANALYZED_SPANS: &str = "_dd.analyzed"; const DECISION_MAKER_PROBABILISTIC: &str = "-9"; const DECISION_MAKER_MANUAL_PRIORITY: &str = "-4"; -// Debug tag to identify which sampler path made the decision. -const WACKTEST_SAMPLER_PATH_KEY: &str = "wacktest.sampler_path"; const MAX_TRACE_ID: u64 = u64::MAX; const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; @@ -314,16 +312,6 @@ impl TraceSampler { fn run_samplers(&mut self, trace: &mut Trace) -> (bool, i32, &'static str, Option) { // logic taken from: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/agent/agent.go#L1066 let now = std::time::SystemTime::now(); - println!( - "trace sampler config: probabilistic_sampler_enabled={} sampling_rate={} error_sampling_enabled={}", - self.probabilistic_sampler_enabled, self.sampling_rate, self.error_sampling_enabled - ); - info!( - probabilistic_sampler_enabled = self.probabilistic_sampler_enabled, - sampling_rate = self.sampling_rate, - error_sampling_enabled = self.error_sampling_enabled, - "trace sampler config" - ); // Empty trace check if trace.spans().is_empty() { return (false, PRIORITY_AUTO_DROP, "", None); @@ -343,7 +331,6 @@ impl TraceSampler { if self.sample_probabilistic(root_trace_id) { decision_maker = DECISION_MAKER_PROBABILISTIC; // probabilistic sampling prob_keep = true; - self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic"); if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { let metrics = root_span.metrics_mut(); @@ -351,9 +338,6 @@ impl TraceSampler { } } else if self.error_sampling_enabled && contains_error { prob_keep = self.error_sampler.sample_error(now, trace, root_span_idx); - self.set_wacktest_sampler_path(trace, root_span_idx, "error"); - } else { - self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic_drop"); } let priority = if prob_keep { @@ -369,7 +353,6 @@ impl TraceSampler { if let Some(priority) = user_priority { if priority > 0 { // User wants to keep this trace - self.set_wacktest_sampler_path(trace, root_span_idx, "manual_priority"); return (true, priority, DECISION_MAKER_MANUAL_PRIORITY, Some(root_span_idx)); } } else if self.is_otlp_trace(trace, root_span_idx) { @@ -378,36 +361,23 @@ impl TraceSampler { if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { root_span.metrics_mut().remove(PROB_RATE_KEY); } - self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic"); return (true, PRIORITY_AUTO_KEEP, DECISION_MAKER_PROBABILISTIC, Some(root_span_idx)); } - self.set_wacktest_sampler_path(trace, root_span_idx, "probabilistic_drop"); } else if self.no_priority_sampler.sample(now, trace, root_span_idx) { - self.set_wacktest_sampler_path(trace, root_span_idx, "no_priority"); return (true, PRIORITY_AUTO_KEEP, "", Some(root_span_idx)); } if self.error_sampling_enabled && self.trace_contains_error(trace, false) { let keep = self.error_sampler.sample_error(now, trace, root_span_idx); if keep { - self.set_wacktest_sampler_path(trace, root_span_idx, "error"); return (true, PRIORITY_AUTO_KEEP, "", Some(root_span_idx)); } } // Default: drop the trace - self.set_wacktest_sampler_path(trace, root_span_idx, "drop"); (false, PRIORITY_AUTO_DROP, "", Some(root_span_idx)) } - fn set_wacktest_sampler_path(&self, trace: &mut Trace, root_span_idx: usize, value: &'static str) { - if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { - root_span - .meta_mut() - .insert(MetaString::from_static(WACKTEST_SAMPLER_PATH_KEY), MetaString::from_static(value)); - } - } - /// Apply sampling metadata to the trace in-place. /// /// The `root_span_id` parameter identifies which span should receive the sampling metadata. From e76278f7ac33fbfd17ddcae8218210b7249b6045 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 10:46:48 -0500 Subject: [PATCH 45/53] make fmt --- .../src/transforms/trace_sampler/mod.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index a855925c8a..488b019c5a 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -58,7 +58,6 @@ const KEY_ANALYZED_SPANS: &str = "_dd.analyzed"; const DECISION_MAKER_PROBABILISTIC: &str = "-9"; const DECISION_MAKER_MANUAL_PRIORITY: &str = "-4"; - const MAX_TRACE_ID: u64 = u64::MAX; const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; const SAMPLER_HASHER: u64 = 1111111111111111111; @@ -228,7 +227,10 @@ impl TraceSampler { trace .spans() .get(root_span_idx) - .map(|span| span.meta().contains_key(&MetaString::from_static(OTEL_TRACE_ID_META_KEY))) + .map(|span| { + span.meta() + .contains_key(&MetaString::from_static(OTEL_TRACE_ID_META_KEY)) + }) .unwrap_or(false) } @@ -361,7 +363,12 @@ impl TraceSampler { if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { root_span.metrics_mut().remove(PROB_RATE_KEY); } - return (true, PRIORITY_AUTO_KEEP, DECISION_MAKER_PROBABILISTIC, Some(root_span_idx)); + return ( + true, + PRIORITY_AUTO_KEEP, + DECISION_MAKER_PROBABILISTIC, + Some(root_span_idx), + ); } } else if self.no_priority_sampler.sample(now, trace, root_span_idx) { return (true, PRIORITY_AUTO_KEEP, "", Some(root_span_idx)); From 0ee5cf291bfbea43b4c75e9dc99720d1265cc654 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 13:23:05 -0500 Subject: [PATCH 46/53] move constants to shared file --- .../src/common/datadog/mod.rs | 18 ++++++++++++++++++ .../src/common/otlp/traces/transform.rs | 3 +-- .../src/encoders/datadog/traces/mod.rs | 9 ++------- .../src/transforms/trace_sampler/mod.rs | 14 ++++---------- .../transforms/trace_sampler/score_sampler.rs | 7 +------ 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/lib/saluki-components/src/common/datadog/mod.rs b/lib/saluki-components/src/common/datadog/mod.rs index 64c0f81ba3..f1ebaec654 100644 --- a/lib/saluki-components/src/common/datadog/mod.rs +++ b/lib/saluki-components/src/common/datadog/mod.rs @@ -18,3 +18,21 @@ pub const DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT: usize = 3_200_000; // 3 MiB /// Default uncompressed size limit for intake requests. pub const DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT: usize = 62_914_560; // 60 MiB + +/// Metadata tag used to store the sampling decision maker (`_dd.p.dm`). +pub const TAG_DECISION_MAKER: &str = "_dd.p.dm"; + +/// Decision maker value for probabilistic sampling (matches Datadog Agent). +pub const DECISION_MAKER_PROBABILISTIC: &str = "-9"; + +/// Metadata key used to store the OTEL trace id. +pub const OTEL_TRACE_ID_META_KEY: &str = "otel.trace_id"; + +/// Maximum trace id used for deterministic sampling. +pub const MAX_TRACE_ID: u64 = u64::MAX; + +/// Precomputed float form of `MAX_TRACE_ID`. +pub const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; + +/// Hasher used for deterministic sampling. +pub const SAMPLER_HASHER: u64 = 1111111111111111111; diff --git a/lib/saluki-components/src/common/otlp/traces/transform.rs b/lib/saluki-components/src/common/otlp/traces/transform.rs index 224253fe2b..887b5a3cb7 100644 --- a/lib/saluki-components/src/common/otlp/traces/transform.rs +++ b/lib/saluki-components/src/common/otlp/traces/transform.rs @@ -20,7 +20,7 @@ use serde_json::{Map as JsonMap, Value as JsonValue}; use stringtheory::MetaString; use tracing::error; -use crate::common::datadog::SAMPLING_PRIORITY_METRIC_KEY; +use crate::common::datadog::{OTEL_TRACE_ID_META_KEY, SAMPLING_PRIORITY_METRIC_KEY}; use crate::common::otlp::attributes::{get_int_attribute, HTTP_MAPPINGS}; use crate::common::otlp::traces::normalize::{normalize_service, normalize_tag_value}; use crate::common::otlp::traces::normalize::{truncate_utf8, MAX_RESOURCE_LEN}; @@ -71,7 +71,6 @@ const NETWORK_PROTOCOL_NAME_KEY: &str = "network.protocol.name"; const HTTP_STATUS_CODE_KEY: &str = "http.status_code"; const HTTP_RESPONSE_STATUS_CODE_KEY: &str = "http.response.status_code"; const SPAN_KIND_META_KEY: &str = "span.kind"; -const OTEL_TRACE_ID_META_KEY: &str = "otel.trace_id"; const W3C_TRACESTATE_META_KEY: &str = "w3c.tracestate"; const OTEL_LIBRARY_NAME_META_KEY: &str = "otel.library.name"; const OTEL_LIBRARY_VERSION_META_KEY: &str = "otel.library.version"; diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index 0e7541767c..f1a59e0aa9 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -50,7 +50,8 @@ use crate::common::datadog::{ io::RB_BUFFER_CHUNK_SIZE, request_builder::{EndpointEncoder, RequestBuilder}, telemetry::ComponentTelemetry, - DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, + DECISION_MAKER_PROBABILISTIC, DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, + MAX_TRACE_ID_FLOAT, OTEL_TRACE_ID_META_KEY, SAMPLER_HASHER, TAG_DECISION_MAKER, }; use crate::common::otlp::config::TracesConfig; use crate::common::otlp::util::{ @@ -65,13 +66,7 @@ static CONTENT_TYPE_PROTOBUF: HeaderValue = HeaderValue::from_static("applicatio // Sampling metadata keys / values. const TAG_OTLP_SAMPLING_RATE: &str = "_dd.otlp_sr"; -const TAG_DECISION_MAKER: &str = "_dd.p.dm"; -const DECISION_MAKER_PROBABILISTIC: &str = "-9"; const DEFAULT_CHUNK_PRIORITY: i32 = 1; // PRIORITY_AUTO_KEEP -const OTEL_TRACE_ID_META_KEY: &str = "otel.trace_id"; -const MAX_TRACE_ID: u64 = u64::MAX; -const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; -const SAMPLER_HASHER: u64 = 1111111111111111111; fn default_serializer_compressor_kind() -> String { "zstd".to_string() diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 488b019c5a..d1c2827990 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -36,7 +36,10 @@ mod score_sampler; mod signature; use self::probabilistic::PROB_RATE_KEY; -use crate::common::datadog::{apm::ApmConfig, SAMPLING_PRIORITY_METRIC_KEY}; +use crate::common::datadog::{ + apm::ApmConfig, DECISION_MAKER_PROBABILISTIC, MAX_TRACE_ID_FLOAT, OTEL_TRACE_ID_META_KEY, SAMPLER_HASHER, + SAMPLING_PRIORITY_METRIC_KEY, TAG_DECISION_MAKER, +}; use crate::common::otlp::config::TracesConfig; // Sampling priority constants (matching datadog-agent) @@ -46,22 +49,13 @@ const PRIORITY_USER_KEEP: i32 = 2; const ERROR_SAMPLE_RATE: f64 = 1.0; // Default extra sample rate (matches agent's ExtraSampleRate) -// Sampling metadata keys / values (matching datadog-agent where applicable). -const TAG_DECISION_MAKER: &str = "_dd.p.dm"; -const OTEL_TRACE_ID_META_KEY: &str = "otel.trace_id"; - // Single Span Sampling and Analytics Events keys const KEY_SPAN_SAMPLING_MECHANISM: &str = "_dd.span_sampling.mechanism"; const KEY_ANALYZED_SPANS: &str = "_dd.analyzed"; // Decision maker values for `_dd.p.dm` (matching datadog-agent). -const DECISION_MAKER_PROBABILISTIC: &str = "-9"; const DECISION_MAKER_MANUAL_PRIORITY: &str = "-4"; -const MAX_TRACE_ID: u64 = u64::MAX; -const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; -const SAMPLER_HASHER: u64 = 1111111111111111111; - fn normalize_sampling_rate(rate: f64) -> f64 { if rate <= 0.0 || rate >= 1.0 { 1.0 diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs index 7d44803a85..1a0bd8137c 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -5,6 +5,7 @@ use saluki_core::data_model::event::trace::{Span, Trace}; use stringtheory::MetaString; use super::signature::{compute_signature_with_root_and_env, Signature}; +use crate::common::datadog::{MAX_TRACE_ID_FLOAT, SAMPLER_HASHER}; use crate::transforms::trace_sampler::core_sampler::Sampler; // Metric keys for sampling rates @@ -18,12 +19,6 @@ pub(super) const NO_PRIORITY_RATE_KEY: &str = "_dd.no_p_sr"; // shrinkCardinality is the max Signature cardinality before shrinking const SHRINK_CARDINALITY: usize = 200; -// Constants for deterministic sampling -const MAX_TRACE_ID: u64 = u64::MAX; -const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; -// Using a prime number for better distribution -const SAMPLER_HASHER: u64 = 1111111111111111111; - /// ScoreSampler for traces /// /// ScoreSampler samples pieces of traces by computing a signature based on spans (service, name, rsc, http.status, error.type) From 61401fb50e190b78f58a0d5f97434704b269c842 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 13:26:48 -0500 Subject: [PATCH 47/53] remove test covered by correctness test --- .../src/transforms/trace_sampler/mod.rs | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index d1c2827990..a2b5221963 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -936,31 +936,4 @@ mod tests { ); } } - - #[test] - fn test_otlp_legacy_sampling_skips_prob_rate_metric() { - let mut sampler = create_test_sampler(); - sampler.probabilistic_sampler_enabled = false; - sampler.otlp_sampling_rate = 1.0; - - let mut meta = HashMap::new(); - meta.insert(OTEL_TRACE_ID_META_KEY.to_string(), "otel-trace-id".to_string()); - let span = create_test_span_with_meta(12345, 1, meta); - let mut trace = create_test_trace(vec![span]); - - let (keep, priority, decision_maker, root_span_idx) = sampler.run_samplers(&mut trace); - assert!(keep); - assert_eq!(priority, PRIORITY_AUTO_KEEP); - assert_eq!(decision_maker, DECISION_MAKER_PROBABILISTIC); - - let root_span_idx = root_span_idx.expect("root span index should be present"); - sampler.apply_sampling_metadata(&mut trace, keep, priority, decision_maker, root_span_idx); - - let root_span = &trace.spans()[root_span_idx]; - assert_eq!( - root_span.meta().get(TAG_DECISION_MAKER).unwrap(), - &MetaString::from(DECISION_MAKER_PROBABILISTIC) - ); - assert!(!root_span.metrics().contains_key(PROB_RATE_KEY)); - } } From b3d485635a83e589814595cab5088b213d51f8b1 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 13:55:46 -0500 Subject: [PATCH 48/53] add comment --- lib/saluki-components/src/encoders/datadog/traces/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index f1a59e0aa9..4e51cf8ac8 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -552,7 +552,9 @@ impl TraceEndpointEncoder { rate } + // helper function for adding tag fn should_add_otlp_decision_maker(&self, trace: &Trace, sampling: &TraceSampling) -> bool { + // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/api/otlp.go#L556-L579 if self.apm_config.probabilistic_sampler_enabled() { return false; } From 8b28ef490e38ad830443c09afa894486f733fe49 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 14:02:15 -0500 Subject: [PATCH 49/53] move duplicate code to shared folder --- lib/saluki-components/src/common/datadog/mod.rs | 11 +++++++++++ .../src/encoders/datadog/traces/mod.rs | 11 ++--------- .../src/transforms/trace_sampler/mod.rs | 10 +--------- .../src/transforms/trace_sampler/score_sampler.rs | 12 +----------- 4 files changed, 15 insertions(+), 29 deletions(-) diff --git a/lib/saluki-components/src/common/datadog/mod.rs b/lib/saluki-components/src/common/datadog/mod.rs index f1ebaec654..04d6a5d697 100644 --- a/lib/saluki-components/src/common/datadog/mod.rs +++ b/lib/saluki-components/src/common/datadog/mod.rs @@ -36,3 +36,14 @@ pub const MAX_TRACE_ID_FLOAT: f64 = MAX_TRACE_ID as f64; /// Hasher used for deterministic sampling. pub const SAMPLER_HASHER: u64 = 1111111111111111111; + +/// Returns whether to keep a trace, based on its ID and a sampling rate. +/// +/// This assumes trace IDs are nearly uniformly distributed. +pub fn sample_by_rate(trace_id: u64, rate: f64) -> bool { + if rate < 1.0 { + trace_id.wrapping_mul(SAMPLER_HASHER) < (rate * MAX_TRACE_ID_FLOAT) as u64 + } else { + true + } +} diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index 4e51cf8ac8..1f28089f7b 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -51,7 +51,8 @@ use crate::common::datadog::{ request_builder::{EndpointEncoder, RequestBuilder}, telemetry::ComponentTelemetry, DECISION_MAKER_PROBABILISTIC, DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, - MAX_TRACE_ID_FLOAT, OTEL_TRACE_ID_META_KEY, SAMPLER_HASHER, TAG_DECISION_MAKER, + OTEL_TRACE_ID_META_KEY, TAG_DECISION_MAKER, + sample_by_rate, }; use crate::common::otlp::config::TracesConfig; use crate::common::otlp::util::{ @@ -661,14 +662,6 @@ fn trace_has_otel_trace_id(trace: &Trace) -> bool { .contains_key(&MetaString::from_static(OTEL_TRACE_ID_META_KEY)) } -fn sample_by_rate(trace_id: u64, rate: f64) -> bool { - if rate < 1.0 { - trace_id.wrapping_mul(SAMPLER_HASHER) < (rate * MAX_TRACE_ID_FLOAT) as u64 - } else { - true - } -} - fn convert_span(span: &DdSpan) -> ProtoSpan { let mut proto = ProtoSpan::new(); proto.set_service(span.service().to_string()); diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index a2b5221963..cfb06688b1 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -37,7 +37,7 @@ mod signature; use self::probabilistic::PROB_RATE_KEY; use crate::common::datadog::{ - apm::ApmConfig, DECISION_MAKER_PROBABILISTIC, MAX_TRACE_ID_FLOAT, OTEL_TRACE_ID_META_KEY, SAMPLER_HASHER, + apm::ApmConfig, sample_by_rate, DECISION_MAKER_PROBABILISTIC, OTEL_TRACE_ID_META_KEY, SAMPLING_PRIORITY_METRIC_KEY, TAG_DECISION_MAKER, }; use crate::common::otlp::config::TracesConfig; @@ -64,14 +64,6 @@ fn normalize_sampling_rate(rate: f64) -> f64 { } } -fn sample_by_rate(trace_id: u64, rate: f64) -> bool { - if rate < 1.0 { - trace_id.wrapping_mul(SAMPLER_HASHER) < (rate * MAX_TRACE_ID_FLOAT) as u64 - } else { - true - } -} - /// Configuration for the trace sampler transform. #[derive(Debug)] pub struct TraceSamplerConfiguration { diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs index 1a0bd8137c..c9b1a1bec0 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -5,7 +5,7 @@ use saluki_core::data_model::event::trace::{Span, Trace}; use stringtheory::MetaString; use super::signature::{compute_signature_with_root_and_env, Signature}; -use crate::common::datadog::{MAX_TRACE_ID_FLOAT, SAMPLER_HASHER}; +use crate::common::datadog::sample_by_rate; use crate::transforms::trace_sampler::core_sampler::Sampler; // Metric keys for sampling rates @@ -190,13 +190,3 @@ fn get_global_rate(span: &Span) -> f64 { span.metrics().get(KEY_SAMPLING_RATE_GLOBAL).copied().unwrap_or(1.0) } -/// SampleByRate returns whether to keep a trace, based on its ID and a sampling rate. -/// This assumes that trace IDs are nearly uniformly distributed. -fn sample_by_rate(trace_id: u64, rate: f64) -> bool { - // logic taken from here: https://github.com/DataDog/datadog-agent/blob/angel/support-tail-beginning-wildcard/pkg/trace/sampler/sampler.go#L94 - if rate < 1.0 { - trace_id.wrapping_mul(SAMPLER_HASHER) < (rate * MAX_TRACE_ID_FLOAT) as u64 - } else { - true - } -} From 664c3990c2b902e0baa5f80a56235aa96f520834 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 14:04:33 -0500 Subject: [PATCH 50/53] make fmt --- lib/saluki-components/src/encoders/datadog/traces/mod.rs | 2 +- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 4 ++-- .../src/transforms/trace_sampler/score_sampler.rs | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index 1f28089f7b..976787407a 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -49,10 +49,10 @@ use crate::common::datadog::{ apm::ApmConfig, io::RB_BUFFER_CHUNK_SIZE, request_builder::{EndpointEncoder, RequestBuilder}, + sample_by_rate, telemetry::ComponentTelemetry, DECISION_MAKER_PROBABILISTIC, DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, OTEL_TRACE_ID_META_KEY, TAG_DECISION_MAKER, - sample_by_rate, }; use crate::common::otlp::config::TracesConfig; use crate::common::otlp::util::{ diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index cfb06688b1..e6782bacd9 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -37,8 +37,8 @@ mod signature; use self::probabilistic::PROB_RATE_KEY; use crate::common::datadog::{ - apm::ApmConfig, sample_by_rate, DECISION_MAKER_PROBABILISTIC, OTEL_TRACE_ID_META_KEY, - SAMPLING_PRIORITY_METRIC_KEY, TAG_DECISION_MAKER, + apm::ApmConfig, sample_by_rate, DECISION_MAKER_PROBABILISTIC, OTEL_TRACE_ID_META_KEY, SAMPLING_PRIORITY_METRIC_KEY, + TAG_DECISION_MAKER, }; use crate::common::otlp::config::TracesConfig; diff --git a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs index c9b1a1bec0..f84dcbdd7d 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/score_sampler.rs @@ -189,4 +189,3 @@ fn weight_root(span: &Span) -> f32 { fn get_global_rate(span: &Span) -> f64 { span.metrics().get(KEY_SAMPLING_RATE_GLOBAL).copied().unwrap_or(1.0) } - From 7b9a38f99ae38a5216827f7c872e91f8f861bbe3 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 14:24:12 -0500 Subject: [PATCH 51/53] add comment --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index e6782bacd9..9900508b8d 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -344,6 +344,7 @@ impl TraceSampler { return (true, priority, DECISION_MAKER_MANUAL_PRIORITY, Some(root_span_idx)); } } else if self.is_otlp_trace(trace, root_span_idx) { + // some sampling happens upstream in the otlp receiver in the agent: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/api/otlp.go#L572 let root_trace_id = trace.spans()[root_span_idx].trace_id(); if self.sample_otlp(root_trace_id) { if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { From 57c52084a46202bc6577e428a1c8fea717791839 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 14:28:07 -0500 Subject: [PATCH 52/53] remove unecessary code --- lib/saluki-components/src/transforms/trace_sampler/mod.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 9900508b8d..4e7d1edc7f 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -220,10 +220,6 @@ impl TraceSampler { .unwrap_or(false) } - fn sample_otlp(&self, trace_id: u64) -> bool { - sample_by_rate(trace_id, self.otlp_sampling_rate) - } - /// Returns `true` if the trace contains a span with an error. fn trace_contains_error(&self, trace: &Trace, consider_exception_span_events: bool) -> bool { trace.spans().iter().any(|span| { @@ -346,7 +342,7 @@ impl TraceSampler { } else if self.is_otlp_trace(trace, root_span_idx) { // some sampling happens upstream in the otlp receiver in the agent: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/api/otlp.go#L572 let root_trace_id = trace.spans()[root_span_idx].trace_id(); - if self.sample_otlp(root_trace_id) { + if sample_by_rate(root_trace_id, self.otlp_sampling_rate) { if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { root_span.metrics_mut().remove(PROB_RATE_KEY); } From e2794aaa51afcfd26da598358bc34634031e6b8e Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 28 Jan 2026 14:40:04 -0500 Subject: [PATCH 53/53] remove potentially??? unecessary code --- .../src/encoders/datadog/traces/mod.rs | 41 +------------------ 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/lib/saluki-components/src/encoders/datadog/traces/mod.rs b/lib/saluki-components/src/encoders/datadog/traces/mod.rs index 976787407a..bb94769bf5 100644 --- a/lib/saluki-components/src/encoders/datadog/traces/mod.rs +++ b/lib/saluki-components/src/encoders/datadog/traces/mod.rs @@ -19,7 +19,6 @@ use saluki_config::GenericConfiguration; use saluki_context::tags::TagSet; use saluki_core::data_model::event::trace::{ AttributeScalarValue, AttributeValue, Span as DdSpan, SpanEvent as DdSpanEvent, SpanLink as DdSpanLink, - TraceSampling, }; use saluki_core::topology::{EventsBuffer, PayloadsBuffer}; use saluki_core::{ @@ -49,10 +48,8 @@ use crate::common::datadog::{ apm::ApmConfig, io::RB_BUFFER_CHUNK_SIZE, request_builder::{EndpointEncoder, RequestBuilder}, - sample_by_rate, telemetry::ComponentTelemetry, - DECISION_MAKER_PROBABILISTIC, DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, - OTEL_TRACE_ID_META_KEY, TAG_DECISION_MAKER, + DEFAULT_INTAKE_COMPRESSED_SIZE_LIMIT, DEFAULT_INTAKE_UNCOMPRESSED_SIZE_LIMIT, TAG_DECISION_MAKER, }; use crate::common::otlp::config::TracesConfig; use crate::common::otlp::util::{ @@ -553,29 +550,6 @@ impl TraceEndpointEncoder { rate } - // helper function for adding tag - fn should_add_otlp_decision_maker(&self, trace: &Trace, sampling: &TraceSampling) -> bool { - // logic taken from here: https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/api/otlp.go#L556-L579 - if self.apm_config.probabilistic_sampler_enabled() { - return false; - } - if sampling.decision_maker.is_some() { - return false; - } - let priority = sampling.priority.unwrap_or(DEFAULT_CHUNK_PRIORITY); - if sampling.dropped_trace || priority <= 0 { - return false; - } - if !trace_has_otel_trace_id(trace) { - return false; - } - let trace_id = match trace.spans().first() { - Some(span) => span.trace_id(), - None => return false, - }; - sample_by_rate(trace_id, self.sampling_rate()) - } - fn build_trace_chunk(&self, trace: &Trace) -> TraceChunk { let spans: Vec = trace.spans().iter().map(convert_span).collect(); let mut chunk = TraceChunk::new(); @@ -589,12 +563,9 @@ impl TraceEndpointEncoder { chunk.set_priority(sampling.priority.unwrap_or(DEFAULT_CHUNK_PRIORITY)); chunk.set_droppedTrace(sampling.dropped_trace); - // Set decision maker tag if present, otherwise align OTLP behavior with trace-agent's - // OTLPReceiver when probabilistic sampling is disabled. + // Set decision maker tag if present. if let Some(dm) = &sampling.decision_maker { tags.insert(TAG_DECISION_MAKER.to_string(), dm.to_string()); - } else if self.should_add_otlp_decision_maker(trace, sampling) { - tags.insert(TAG_DECISION_MAKER.to_string(), DECISION_MAKER_PROBABILISTIC.to_string()); } // Set OTLP sampling rate tag if present (from sampler) @@ -654,14 +625,6 @@ impl EndpointEncoder for TraceEndpointEncoder { } } -fn trace_has_otel_trace_id(trace: &Trace) -> bool { - let Some(span) = trace.spans().first() else { - return false; - }; - span.meta() - .contains_key(&MetaString::from_static(OTEL_TRACE_ID_META_KEY)) -} - fn convert_span(span: &DdSpan) -> ProtoSpan { let mut proto = ProtoSpan::new(); proto.set_service(span.service().to_string());