Skip to content

Commit 536e28d

Browse files
committed
Update RateSampler to be compatible with the new logic
1 parent d75de50 commit 536e28d

File tree

2 files changed

+34
-35
lines changed

2 files changed

+34
-35
lines changed

ddtrace/sampler.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010

1111
MAX_TRACE_ID = 2 ** 64
1212

13+
# Has to be the same factor and key as the Agent to allow chained sampling
14+
KNUTH_FACTOR = 1111111111111111111
15+
SAMPLE_RATE_METRIC_KEY = "_sample_rate"
16+
1317
class AllSampler(object):
1418
"""Sampler sampling all the traces"""
1519

@@ -40,15 +44,17 @@ def set_sample_rate(self, sample_rate):
4044
self.sampling_id_threshold = sample_rate * MAX_TRACE_ID
4145

4246
def sample(self, span):
43-
span.sampled = span.trace_id <= self.sampling_id_threshold
44-
47+
span.sampled = ((span.trace_id * KNUTH_FACTOR) % MAX_TRACE_ID) <= self.sampling_id_threshold
48+
span.set_metric(SAMPLE_RATE_METRIC_KEY, self.sample_rate)
4549

4650
class ThroughputSampler(object):
4751
""" Sampler applying a strict limit over the trace volume.
4852
4953
Stop tracing once reached more than `tps` traces per second.
5054
Computation is based on a circular buffer over the last
5155
`BUFFER_DURATION` with a `BUFFER_SIZE` size.
56+
57+
DEPRECATED: Outdated implementation.
5258
"""
5359

5460
# Reasonable values

tests/test_sampler.py

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,46 +6,39 @@
66
import threading
77

88
from ddtrace.tracer import Tracer
9-
from ddtrace.sampler import RateSampler, ThroughputSampler
9+
from ddtrace.sampler import RateSampler, ThroughputSampler, SAMPLE_RATE_METRIC_KEY
1010
from .test_tracer import DummyWriter
1111
from .util import patch_time
1212

1313

1414
class RateSamplerTest(unittest.TestCase):
1515

16-
def test_random_sequence(self):
16+
def test_sample_rate_deviation(self):
1717
writer = DummyWriter()
18-
tracer = Tracer()
19-
tracer.writer = writer
20-
tracer.sampler = RateSampler(0.5)
21-
22-
# Set the seed so that the choice of sampled traces
23-
# is deterministic, then write tests accordingly
24-
random.seed(4012)
25-
26-
# First trace, sampled
27-
with tracer.trace("foo") as s:
28-
assert s.sampled
29-
assert writer.pop()
30-
31-
# Second trace, not sampled
32-
with tracer.trace("figh") as s:
33-
assert not s.sampled
34-
s2 = tracer.trace("what")
35-
assert not s2.sampled
36-
s2.finish()
37-
with tracer.trace("ever") as s3:
38-
assert not s3.sampled
39-
s4 = tracer.trace("!")
40-
assert not s4.sampled
41-
s4.finish()
42-
spans = writer.pop()
43-
assert not spans, spans
44-
45-
# Third trace, not sampled
46-
with tracer.trace("ters") as s:
47-
assert s.sampled
48-
assert writer.pop()
18+
19+
for sample_rate in [0.1, 0.25, 0.5, 1]:
20+
tracer = Tracer()
21+
tracer.writer = writer
22+
23+
sample_rate = 0.5
24+
tracer.sampler = RateSampler(sample_rate)
25+
26+
random.seed(1234)
27+
28+
iterations = int(2e4)
29+
30+
for i in range(iterations):
31+
span = tracer.trace(i)
32+
span.finish()
33+
34+
samples = writer.pop()
35+
36+
# We must have at least 1 sample, check that it has its sample rate properly assigned
37+
assert samples[0].get_metric(SAMPLE_RATE_METRIC_KEY) == 0.5
38+
39+
# Less than 1% deviation when "enough" iterations (arbitrary, just check if it converges)
40+
deviation = abs(len(samples) - (iterations * sample_rate)) / (iterations * sample_rate)
41+
assert deviation < 0.01, "Deviation too high %f with sample_rate %f" % (deviation, sample_rate)
4942

5043

5144
class ThroughputSamplerTest(unittest.TestCase):

0 commit comments

Comments
 (0)