Skip to content

Commit cf3377a

Browse files
authored
Merge pull request #71 from DataDog/benjamin/update-sampler
Update sampler
2 parents 9af1c94 + c3dbb72 commit cf3377a

File tree

5 files changed

+57
-53
lines changed

5 files changed

+57
-53
lines changed

ddtrace/compat.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,9 @@
2121

2222

2323
try:
24-
import ujson as json
24+
import simplejson as json
2525
except ImportError:
26-
try:
27-
import simplejson as json
28-
except ImportError:
29-
import json
26+
import json
3027

3128
def iteritems(obj, **kwargs):
3229
func = getattr(obj, "iteritems", None)

ddtrace/sampler.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22
33
Any `sampled = False` trace won't be written, and can be ignored by the instrumentation.
44
"""
5-
65
import logging
76
import array
87
import threading
98

10-
from .span import MAX_TRACE_ID
11-
129
log = logging.getLogger(__name__)
1310

11+
MAX_TRACE_ID = 2 ** 64
12+
13+
# Has to be the same factor and key as the Agent to allow chained sampling
14+
KNUTH_FACTOR = 1111111111111111111
15+
SAMPLE_RATE_METRIC_KEY = "_sample_rate"
1416

1517
class AllSampler(object):
1618
"""Sampler sampling all the traces"""
@@ -42,17 +44,17 @@ def set_sample_rate(self, sample_rate):
4244
self.sampling_id_threshold = sample_rate * MAX_TRACE_ID
4345

4446
def sample(self, span):
45-
span.sampled = span.trace_id <= self.sampling_id_threshold
46-
# `weight` is an attribute applied to all spans to help scaling related statistics
47-
span.weight = 1 / (self.sample_rate or 1)
48-
47+
span.sampled = ((span.trace_id * KNUTH_FACTOR) % MAX_TRACE_ID) <= self.sampling_id_threshold
48+
span.set_metric(SAMPLE_RATE_METRIC_KEY, self.sample_rate)
4949

5050
class ThroughputSampler(object):
5151
""" Sampler applying a strict limit over the trace volume.
5252
5353
Stop tracing once reached more than `tps` traces per second.
5454
Computation is based on a circular buffer over the last
5555
`BUFFER_DURATION` with a `BUFFER_SIZE` size.
56+
57+
DEPRECATED: Outdated implementation.
5658
"""
5759

5860
# Reasonable values

ddtrace/span.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ class Span(object):
3030
'duration',
3131
# Sampler attributes
3232
'sampled',
33-
'weight',
3433
# Internal attributes
3534
'_tracer',
3635
'_finished',
@@ -89,7 +88,6 @@ def __init__(
8988

9089
# sampling
9190
self.sampled = True
92-
self.weight = 1
9391

9492
self._tracer = tracer
9593
self._parent = None
@@ -185,7 +183,6 @@ def to_dict(self):
185183
'resource' : self.resource,
186184
'name' : self.name,
187185
'error': self.error,
188-
'weight': self.weight,
189186
}
190187

191188
if self.start:
@@ -277,8 +274,6 @@ def __repr__(self):
277274
self.name,
278275
)
279276

280-
MAX_TRACE_ID = 2 ** 63
281277
def _new_id():
282-
"""Generate a random trace_id"""
283-
return random.getrandbits(63)
284-
278+
"""Generate a random trace_id or span_id"""
279+
return random.getrandbits(64)

docs/index.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,24 @@ API
9696

9797
.. _integrations:
9898

99+
100+
Sampling
101+
--------
102+
103+
It is possible to sample traces with `ddtrace`.
104+
While the Trace Agent already samples traces to reduce the bandwidth usage, this client sampling
105+
reduces performance overhead.
106+
107+
`RateSampler` samples a ratio of the traces. Its usage is simple::
108+
109+
from ddtrace.sampler import RateSampler
110+
111+
# Sample rate is between 0 (nothing sampled) to 1 (everything sampled).
112+
# Sample 50% of the traces.
113+
sample_rate = 0.5
114+
tracer.sampler = RateSampler(sample_rate)
115+
116+
99117
Integrations
100118
------------
101119

tests/test_sampler.py

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,47 +6,39 @@
66
import threading
77

88
from ddtrace.tracer import Tracer
9-
from ddtrace.sampler import RateSampler, ThroughputSampler
9+
from ddtrace.sampler import RateSampler, ThroughputSampler, SAMPLE_RATE_METRIC_KEY
1010
from .test_tracer import DummyWriter
1111
from .util import patch_time
1212

1313

1414
class RateSamplerTest(unittest.TestCase):
1515

16-
def test_random_sequence(self):
16+
def test_sample_rate_deviation(self):
1717
writer = DummyWriter()
18-
tracer = Tracer()
19-
tracer.writer = writer
20-
tracer.sampler = RateSampler(0.5)
21-
22-
# Set the seed so that the choice of sampled traces
23-
# is deterministic, then write tests accordingly
24-
random.seed(4012)
25-
26-
# First trace, sampled
27-
with tracer.trace("foo") as s:
28-
assert s.sampled
29-
assert s.weight == 2
30-
assert writer.pop()
31-
32-
# Second trace, not sampled
33-
with tracer.trace("figh") as s:
34-
assert not s.sampled
35-
s2 = tracer.trace("what")
36-
assert not s2.sampled
37-
s2.finish()
38-
with tracer.trace("ever") as s3:
39-
assert not s3.sampled
40-
s4 = tracer.trace("!")
41-
assert not s4.sampled
42-
s4.finish()
43-
spans = writer.pop()
44-
assert not spans, spans
45-
46-
# Third trace, not sampled
47-
with tracer.trace("ters") as s:
48-
assert s.sampled
49-
assert writer.pop()
18+
19+
for sample_rate in [0.1, 0.25, 0.5, 1]:
20+
tracer = Tracer()
21+
tracer.writer = writer
22+
23+
sample_rate = 0.5
24+
tracer.sampler = RateSampler(sample_rate)
25+
26+
random.seed(1234)
27+
28+
iterations = int(2e4)
29+
30+
for i in range(iterations):
31+
span = tracer.trace(i)
32+
span.finish()
33+
34+
samples = writer.pop()
35+
36+
# We must have at least 1 sample, check that it has its sample rate properly assigned
37+
assert samples[0].get_metric(SAMPLE_RATE_METRIC_KEY) == 0.5
38+
39+
# Less than 1% deviation when "enough" iterations (arbitrary, just check if it converges)
40+
deviation = abs(len(samples) - (iterations * sample_rate)) / (iterations * sample_rate)
41+
assert deviation < 0.01, "Deviation too high %f with sample_rate %f" % (deviation, sample_rate)
5042

5143

5244
class ThroughputSamplerTest(unittest.TestCase):

0 commit comments

Comments
 (0)