Skip to content

Commit ebd685e

Browse files
authored
internal: refactor DatadogSampler default sampling rule and agent sampler
Right now DatadogSampler.default_sampler has the responsibility of either being a default SamplingRule or the older RateByServiceSampler. Instead of having a single attribute which tries to represent both of these different paths, separating them into two makes juggling types and reasoning about the flow of the sampler better.
1 parent c41f144 commit ebd685e

File tree

3 files changed

+186
-39
lines changed

3 files changed

+186
-39
lines changed

ddtrace/sampler.py

Lines changed: 79 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from .internal.rate_limiter import RateLimiter
2828
from .internal.utils.cache import cachedmethod
2929
from .internal.utils.formats import get_env
30+
from .vendor.debtcollector.removals import removed_property
3031

3132

3233
try:
@@ -47,6 +48,10 @@
4748
KNUTH_FACTOR = 1111111111111111111
4849

4950

51+
class SamplingError(Exception):
52+
pass
53+
54+
5055
class BaseSampler(six.with_metaclass(abc.ABCMeta)):
5156
@abc.abstractmethod
5257
def sample(self, span):
@@ -153,7 +158,37 @@ def update_rate_by_service_sample_rates(self, rate_by_service):
153158

154159

155160
class DatadogSampler(BasePrioritySampler):
156-
__slots__ = ("default_sampler", "limiter", "rules")
161+
"""
162+
Default sampler used by Tracer for determining if a trace should be kept or dropped.
163+
164+
By default this sampler will rely on dynamic sample rates provided by the trace agent
165+
to determine which traces are kept or dropped.
166+
167+
You can also configure a static sample rate via ``default_sample_rate`` to use for sampling.
168+
When a ``default_sample_rate`` is configured, that is the only sample rate used, the agent
169+
provided rates are ignored.
170+
171+
You may also supply a list of ``SamplingRule`` to determine sample rates for specific
172+
services or operation names.
173+
174+
Example rules::
175+
176+
DatadogSampler(rules=[
177+
SamplingRule(sample_rate=1.0, service="my-svc"),
178+
SamplingRule(sample_rate=0.0, service="less-important"),
179+
])
180+
181+
Rules are evaluated in the order they are provided, and the first rule that matches is used.
182+
If no rule matches, then the agent sample rates are used.
183+
184+
185+
Lastly, this sampler can be configured with a rate limit. This will ensure the max number of
186+
sampled traces per second does not exceed the supplied limit. The default is 100 traces kept
187+
per second. This rate limiter is only used when ``default_sample_rate`` or ``rules`` are
188+
provided. It is not used when the agent supplied sample rates are used.
189+
"""
190+
191+
__slots__ = ("_agent_sampler", "limiter", "rules")
157192

158193
NO_RATE_LIMIT = -1
159194
DEFAULT_RATE_LIMIT = 100
@@ -186,6 +221,7 @@ def __init__(
186221
rate_limit = int(get_env("trace", "rate_limit", default=self.DEFAULT_RATE_LIMIT)) # type: ignore[arg-type]
187222

188223
# Ensure rules is a list
224+
self.rules = [] # type: List[SamplingRule]
189225
if rules is None:
190226
env_sampling_rules = get_env("trace", "sampling_rules")
191227
if env_sampling_rules:
@@ -198,21 +234,30 @@ def __init__(
198234
if not isinstance(rule, SamplingRule):
199235
raise TypeError("Rule {!r} must be a sub-class of type ddtrace.sampler.SamplingRules".format(rule))
200236
self.rules = rules
237+
# DEV: Default sampling rule must come last
238+
if default_sample_rate is not None:
239+
self.rules.append(SamplingRule(sample_rate=default_sample_rate))
240+
241+
# If no rules are configured (or match), fallback to agent based sampling
242+
self._agent_sampler = RateByServiceSampler()
201243

202244
# Configure rate limiter
203245
self.limiter = RateLimiter(rate_limit)
204246

205-
if default_sample_rate is None:
206-
log.debug("initialized DatadogSampler, limit %r traces per second", rate_limit)
207-
# Default to previous default behavior of RateByServiceSampler
208-
self.default_sampler = RateByServiceSampler() # type: BaseSampler
209-
else:
210-
log.debug(
211-
"initialized DatadogSampler, sample %s%% traces, limit %r traces per second",
212-
100 * default_sample_rate,
213-
rate_limit,
214-
)
215-
self.default_sampler = SamplingRule(sample_rate=default_sample_rate)
247+
log.debug("initialized %r", self)
248+
249+
@removed_property(removal_version="1.0.0")
250+
def default_sampler(self):
251+
if self.rules:
252+
return self.rules[-1]
253+
return self._agent_sampler
254+
255+
def __str__(self):
256+
return "{}(agent_sampler={!r}, limiter={!r}, rules={!r})".format(
257+
self.__class__.__name__, self._agent_sampler, self.limiter, self.rules
258+
)
259+
260+
__repr__ = __str__
216261

217262
def _parse_rules_from_env_variable(self, rules):
218263
sampling_rules = []
@@ -238,8 +283,7 @@ def _parse_rules_from_env_variable(self, rules):
238283
def update_rate_by_service_sample_rates(self, sample_rates):
239284
# type: (Dict[str, float]) -> None
240285
# Pass through the call to our RateByServiceSampler
241-
if isinstance(self.default_sampler, RateByServiceSampler):
242-
self.default_sampler.update_rate_by_service_sample_rates(sample_rates)
286+
self._agent_sampler.update_rate_by_service_sample_rates(sample_rates)
243287

244288
def _set_priority(self, span, priority):
245289
# type: (Span, int) -> None
@@ -259,29 +303,29 @@ def sample(self, span):
259303
:rtype: :obj:`bool`
260304
"""
261305
# If there are rules defined, then iterate through them and find one that wants to sample
262-
matching_rule = None # type: Optional[BaseSampler]
306+
matching_rule = None # type: Optional[SamplingRule]
307+
263308
# Go through all rules and grab the first one that matched
264309
# DEV: This means rules should be ordered by the user from most specific to least specific
265310
for rule in self.rules:
266311
if rule.matches(span):
267312
matching_rule = rule
268313
break
269314
else:
270-
# If this is the old sampler, sample and return
271-
if isinstance(self.default_sampler, RateByServiceSampler):
272-
if self.default_sampler.sample(span):
273-
self._set_priority(span, AUTO_KEEP)
274-
return True
275-
else:
276-
self._set_priority(span, AUTO_REJECT)
277-
return False
278-
279-
# If no rules match, use our default sampler
280-
matching_rule = self.default_sampler
315+
# No rules matches so use agent based sampling
316+
if self._agent_sampler.sample(span):
317+
self._set_priority(span, AUTO_KEEP)
318+
return True
319+
else:
320+
self._set_priority(span, AUTO_REJECT)
321+
return False
322+
323+
# DEV: This should never happen, but since the type is Optional we have to check
324+
if not matching_rule:
325+
raise SamplingError("No sampling rule found for span {!r} from {!r}".format(span, self))
281326

282327
# Sample with the matching sampling rule
283-
if isinstance(matching_rule, (RateSampler, SamplingRule)):
284-
span.set_metric(SAMPLING_RULE_DECISION, matching_rule.sample_rate)
328+
span.set_metric(SAMPLING_RULE_DECISION, matching_rule.sample_rate)
285329
if not matching_rule.sample(span):
286330
self._set_priority(span, USER_REJECT)
287331
return False
@@ -451,3 +495,10 @@ def __repr__(self):
451495
)
452496

453497
__str__ = __repr__
498+
499+
def __eq__(self, other):
500+
# type: (Any) -> bool
501+
if not isinstance(other, SamplingRule):
502+
raise TypeError("Cannot compare SamplingRule to {}".format(type(other)))
503+
504+
return self.sample_rate == other.sample_rate and self.service == other.service and self.name == other.name
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
deprecations:
3+
- |
4+
``ddtrace.sampler.DatadogSampler.default_sampler`` property is deprecated and will be removed in 1.0.

tests/tracer/test_sampler.py

Lines changed: 103 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,94 @@ def test_sampling_rule_init():
263263
assert rule.name == name_regex
264264

265265

266+
@pytest.mark.parametrize(
267+
"rule_1,rule_2,expected",
268+
[
269+
# Sample rate only
270+
(SamplingRule(sample_rate=1.0), SamplingRule(sample_rate=1.0), True),
271+
(SamplingRule(sample_rate=0.5), SamplingRule(sample_rate=0.5), True),
272+
(SamplingRule(sample_rate=0.0), SamplingRule(sample_rate=0.0), True),
273+
(SamplingRule(sample_rate=0.5), SamplingRule(sample_rate=1.0), False),
274+
# Sample rate, and service name
275+
(SamplingRule(sample_rate=1.0, service="my-svc"), SamplingRule(sample_rate=1.0, service="my-svc"), True),
276+
(
277+
SamplingRule(sample_rate=1.0, service=re.compile("my-svc")),
278+
SamplingRule(sample_rate=1.0, service=re.compile("my-svc")),
279+
True,
280+
),
281+
(SamplingRule(sample_rate=1.0, service="my-svc"), SamplingRule(sample_rate=1.0, service="other-svc"), False),
282+
(SamplingRule(sample_rate=1.0, service="my-svc"), SamplingRule(sample_rate=0.5, service="my-svc"), False),
283+
(
284+
SamplingRule(sample_rate=1.0, service=re.compile("my-svc")),
285+
SamplingRule(sample_rate=0.5, service=re.compile("my-svc")),
286+
False,
287+
),
288+
(
289+
SamplingRule(sample_rate=1.0, service=re.compile("my-svc")),
290+
SamplingRule(sample_rate=1.0, service=re.compile("other")),
291+
False,
292+
),
293+
# Sample rate, and operation name
294+
(
295+
SamplingRule(sample_rate=1.0, name="span.name"),
296+
SamplingRule(sample_rate=1.0, name="span.name"),
297+
True,
298+
),
299+
(
300+
SamplingRule(sample_rate=1.0, name=re.compile("span.name")),
301+
SamplingRule(sample_rate=1.0, name=re.compile("span.name")),
302+
True,
303+
),
304+
(
305+
SamplingRule(sample_rate=1.0, name=re.compile("span.name")),
306+
SamplingRule(sample_rate=1.0, name=re.compile("span.other")),
307+
False,
308+
),
309+
(
310+
SamplingRule(sample_rate=1.0, name="span.name"),
311+
SamplingRule(sample_rate=0.5, name="span.name"),
312+
False,
313+
),
314+
(SamplingRule(sample_rate=1.0, name="span.name"), SamplingRule(sample_rate=1.0, name="span.other"), False),
315+
(SamplingRule(sample_rate=1.0, name="span.name"), SamplingRule(sample_rate=0.5, name="span.name"), False),
316+
# Sample rate, service, and operation name
317+
(
318+
SamplingRule(sample_rate=1.0, service="my-svc", name="span.name"),
319+
SamplingRule(sample_rate=1.0, service="my-svc", name="span.name"),
320+
True,
321+
),
322+
(
323+
SamplingRule(sample_rate=1.0, service="my-svc", name=re.compile("span.name")),
324+
SamplingRule(sample_rate=1.0, service="my-svc", name=re.compile("span.name")),
325+
True,
326+
),
327+
(
328+
SamplingRule(sample_rate=1.0, service=re.compile("my-svc"), name=re.compile("span.name")),
329+
SamplingRule(sample_rate=1.0, service=re.compile("my-svc"), name=re.compile("span.name")),
330+
True,
331+
),
332+
(
333+
SamplingRule(sample_rate=1.0, service="my-svc", name="span.name"),
334+
SamplingRule(sample_rate=0.5, service="my-svc", name="span.name"),
335+
False,
336+
),
337+
(
338+
SamplingRule(sample_rate=1.0, service="my-svc", name="span.name"),
339+
SamplingRule(sample_rate=1.0, service="other", name="span.name"),
340+
False,
341+
),
342+
(
343+
SamplingRule(sample_rate=1.0, service="my-svc", name="span.name"),
344+
SamplingRule(sample_rate=1.0, service="my-svc", name="span.other"),
345+
False,
346+
),
347+
],
348+
)
349+
def test_sampling_rule_eq(rule_1, rule_2, expected):
350+
result = rule_1 == rule_2
351+
assert result == expected
352+
353+
266354
def test_sampling_rule_init_via_env():
267355
# Testing single sampling rule
268356
with override_env(dict(DD_TRACE_SAMPLING_RULES='[{"sample_rate":1.0,"service":"xyz","name":"abc"}]')):
@@ -534,39 +622,39 @@ def test_datadog_sampler_init():
534622
assert sampler.rules == []
535623
assert isinstance(sampler.limiter, RateLimiter)
536624
assert sampler.limiter.rate_limit == DatadogSampler.DEFAULT_RATE_LIMIT
537-
assert isinstance(sampler.default_sampler, RateByServiceSampler)
625+
assert isinstance(sampler._agent_sampler, RateByServiceSampler)
538626

539627
# With rules
540628
rule = SamplingRule(sample_rate=1)
541629
sampler = DatadogSampler(rules=[rule])
542630
assert sampler.rules == [rule]
543631
assert sampler.limiter.rate_limit == DatadogSampler.DEFAULT_RATE_LIMIT
544-
assert isinstance(sampler.default_sampler, RateByServiceSampler)
632+
assert isinstance(sampler._agent_sampler, RateByServiceSampler)
545633

546634
# With rate limit
547635
sampler = DatadogSampler(rate_limit=10)
548636
assert sampler.limiter.rate_limit == 10
549-
assert isinstance(sampler.default_sampler, RateByServiceSampler)
637+
assert isinstance(sampler._agent_sampler, RateByServiceSampler)
550638

551639
# With default_sample_rate
552640
sampler = DatadogSampler(default_sample_rate=0.5)
553641
assert sampler.limiter.rate_limit == DatadogSampler.DEFAULT_RATE_LIMIT
554-
assert isinstance(sampler.default_sampler, SamplingRule)
555-
assert sampler.default_sampler.sample_rate == 0.5
642+
assert sampler.rules == [SamplingRule(sample_rate=0.5)]
643+
assert isinstance(sampler._agent_sampler, RateByServiceSampler)
556644

557645
# From env variables
558646
with override_env(dict(DD_TRACE_SAMPLE_RATE="0.5", DD_TRACE_RATE_LIMIT="10")):
559647
sampler = DatadogSampler()
560648
assert sampler.limiter.rate_limit == 10
561-
assert isinstance(sampler.default_sampler, SamplingRule)
562-
assert sampler.default_sampler.sample_rate == 0.5
649+
assert sampler.rules == [SamplingRule(sample_rate=0.5)]
650+
assert isinstance(sampler._agent_sampler, RateByServiceSampler)
563651

564652
# DD_TRACE_SAMPLE_RATE=0
565653
with override_env(dict(DD_TRACE_SAMPLE_RATE="0")):
566654
sampler = DatadogSampler()
567655
assert sampler.limiter.rate_limit == DatadogSampler.DEFAULT_RATE_LIMIT
568-
assert isinstance(sampler.default_sampler, SamplingRule)
569-
assert sampler.default_sampler.sample_rate == 0
656+
assert sampler.rules == [SamplingRule(sample_rate=0)]
657+
assert isinstance(sampler._agent_sampler, RateByServiceSampler)
570658

571659
# Invalid env vars
572660
with override_env(dict(DD_TRACE_SAMPLE_RATE="asdf")):
@@ -590,6 +678,10 @@ def test_datadog_sampler_init():
590678
sampler = DatadogSampler(rules=[rule_1, rule_2, rule_3])
591679
assert sampler.rules == [rule_1, rule_2, rule_3]
592680

681+
# Ensure default rule is appended
682+
sampler = DatadogSampler(rules=[rule_1, rule_2, rule_3], default_sample_rate=0.75)
683+
assert sampler.rules == [rule_1, rule_2, rule_3, SamplingRule(sample_rate=0.75)]
684+
593685

594686
@mock.patch("ddtrace.sampler.RateByServiceSampler.sample")
595687
def test_datadog_sampler_sample_no_rules(mock_sample, dummy_tracer):
@@ -838,7 +930,7 @@ def test_datadog_sampler_update_rate_by_service_sample_rates(dummy_tracer):
838930
for case in cases:
839931
sampler.update_rate_by_service_sample_rates(case)
840932
rates = {}
841-
for k, v in iteritems(sampler.default_sampler._by_service_samplers):
933+
for k, v in iteritems(sampler._agent_sampler._by_service_samplers):
842934
rates[k] = v.sample_rate
843935
assert case == rates, "%s != %s" % (case, rates)
844936

@@ -848,6 +940,6 @@ def test_datadog_sampler_update_rate_by_service_sample_rates(dummy_tracer):
848940
for case in cases:
849941
sampler.update_rate_by_service_sample_rates(case)
850942
rates = {}
851-
for k, v in iteritems(sampler.default_sampler._by_service_samplers):
943+
for k, v in iteritems(sampler._agent_sampler._by_service_samplers):
852944
rates[k] = v.sample_rate
853945
assert case == rates, "%s != %s" % (case, rates)

0 commit comments

Comments
 (0)