Skip to content

Commit fc8fa9f

Browse files
feat(tracing): Port sample_rand to POTel (#4106)
Port `sample_rand` to `potel-base`. See [spec](https://develop.sentry.dev/sdk/telemetry/traces/#propagated-random-value). There are now two places where a `sample_rand` might be generated: - If we're explicitly propagating with `continue_trace`, we'll [backfill](https://github.com/getsentry/sentry-python/pull/4106/files#diff-7c64294459f5053c93d44e0e33e4e73ffcef0adefcd77ba91f4031aa461a8c42R396-R397) `sample_rand` on the propagation context like on master, either using the incoming one or generating a new one from the incoming `sampled`/`sample_rate`. - Otherwise, we generate a new `sample_rand` [in the Sampler](https://github.com/getsentry/sentry-python/pull/4106/files#diff-59aa7195d955e153b5cdd730f888994996a72eaf5e9ea174335ce961841584a9R194-R213). The generated `sample_rand` is then saved on the trace state. This change fixes most of the failures in the Common test suite. Closes #4027 --------- Co-authored-by: Daniel Szoke <[email protected]>
1 parent 2983854 commit fc8fa9f

File tree

17 files changed

+408
-121
lines changed

17 files changed

+408
-121
lines changed

MIGRATION_GUIDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ Looking to upgrade from Sentry SDK 2.x to 3.x? Here's a comprehensive list of wh
157157
- `profiles_sample_rate` and `profiler_mode` were removed from options available via `_experiments`. Use the top-level `profiles_sample_rate` and `profiler_mode` options instead.
158158
- `Transport.capture_event` has been removed. Use `Transport.capture_envelope` instead.
159159
- Function transports are no longer supported. Subclass the `Transport` instead.
160+
- `start_transaction` (`start_span`) no longer takes a `baggage` argument. Use the `continue_trace()` context manager instead to propagate baggage.
160161

161162
### Deprecated
162163

sentry_sdk/integrations/opentelemetry/consts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@
1212
SENTRY_USE_CURRENT_SCOPE_KEY = create_key("sentry_use_current_scope")
1313
SENTRY_USE_ISOLATION_SCOPE_KEY = create_key("sentry_use_isolation_scope")
1414

15+
# trace state keys
1516
TRACESTATE_SAMPLED_KEY = Baggage.SENTRY_PREFIX + "sampled"
1617
TRACESTATE_SAMPLE_RATE_KEY = Baggage.SENTRY_PREFIX + "sample_rate"
18+
TRACESTATE_SAMPLE_RAND_KEY = Baggage.SENTRY_PREFIX + "sample_rand"
1719

20+
# misc
1821
OTEL_SENTRY_CONTEXT = "otel"
1922
SPAN_ORIGIN = "auto.otel"
2023

sentry_sdk/integrations/opentelemetry/sampler.py

Lines changed: 99 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
1-
import random
1+
from decimal import Decimal
22
from typing import cast
33

44
from opentelemetry import trace
55
from opentelemetry.sdk.trace.sampling import Sampler, SamplingResult, Decision
66
from opentelemetry.trace.span import TraceState
77

88
import sentry_sdk
9-
from sentry_sdk.tracing_utils import has_tracing_enabled
9+
from sentry_sdk.tracing_utils import (
10+
_generate_sample_rand,
11+
has_tracing_enabled,
12+
)
1013
from sentry_sdk.utils import is_valid_sample_rate, logger
1114
from sentry_sdk.integrations.opentelemetry.consts import (
1215
TRACESTATE_SAMPLED_KEY,
16+
TRACESTATE_SAMPLE_RAND_KEY,
1317
TRACESTATE_SAMPLE_RATE_KEY,
1418
SentrySpanAttribute,
1519
)
@@ -70,23 +74,40 @@ def get_parent_sample_rate(parent_context, trace_id):
7074
return None
7175

7276

73-
def dropped_result(parent_span_context, attributes, sample_rate=None):
74-
# type: (SpanContext, Attributes, Optional[float]) -> SamplingResult
75-
# these will only be added the first time in a root span sampling decision
76-
# if sample_rate is provided, it'll be updated in trace state
77-
trace_state = parent_span_context.trace_state
77+
def get_parent_sample_rand(parent_context, trace_id):
78+
# type: (Optional[SpanContext], int) -> Optional[Decimal]
79+
if parent_context is None:
80+
return None
7881

79-
if TRACESTATE_SAMPLED_KEY not in trace_state:
80-
trace_state = trace_state.add(TRACESTATE_SAMPLED_KEY, "false")
81-
elif trace_state.get(TRACESTATE_SAMPLED_KEY) == "deferred":
82-
trace_state = trace_state.update(TRACESTATE_SAMPLED_KEY, "false")
82+
is_span_context_valid = parent_context is not None and parent_context.is_valid
8383

84-
if sample_rate is not None:
85-
trace_state = trace_state.update(TRACESTATE_SAMPLE_RATE_KEY, str(sample_rate))
84+
if is_span_context_valid and parent_context.trace_id == trace_id:
85+
parent_sample_rand = parent_context.trace_state.get(TRACESTATE_SAMPLE_RAND_KEY)
86+
if parent_sample_rand is None:
87+
return None
8688

87-
is_root_span = not (
88-
parent_span_context.is_valid and not parent_span_context.is_remote
89+
return Decimal(parent_sample_rand)
90+
91+
return None
92+
93+
94+
def dropped_result(span_context, attributes, sample_rate=None, sample_rand=None):
95+
# type: (SpanContext, Attributes, Optional[float], Optional[Decimal]) -> SamplingResult
96+
"""
97+
React to a span getting unsampled and return a DROP SamplingResult.
98+
99+
Update the trace_state with the effective sampled, sample_rate and sample_rand,
100+
record that we dropped the event for client report purposes, and return
101+
an OTel SamplingResult with Decision.DROP.
102+
103+
See for more info about OTel sampling:
104+
https://opentelemetry-python.readthedocs.io/en/latest/sdk/trace.sampling.html
105+
"""
106+
trace_state = _update_trace_state(
107+
span_context, sampled=False, sample_rate=sample_rate, sample_rand=sample_rand
89108
)
109+
110+
is_root_span = not (span_context.is_valid and not span_context.is_remote)
90111
if is_root_span:
91112
# Tell Sentry why we dropped the transaction/root-span
92113
client = sentry_sdk.get_client()
@@ -108,19 +129,20 @@ def dropped_result(parent_span_context, attributes, sample_rate=None):
108129
)
109130

110131

111-
def sampled_result(span_context, attributes, sample_rate):
112-
# type: (SpanContext, Attributes, Optional[float]) -> SamplingResult
113-
# these will only be added the first time in a root span sampling decision
114-
# if sample_rate is provided, it'll be updated in trace state
115-
trace_state = span_context.trace_state
132+
def sampled_result(span_context, attributes, sample_rate=None, sample_rand=None):
133+
# type: (SpanContext, Attributes, Optional[float], Optional[Decimal]) -> SamplingResult
134+
"""
135+
React to a span being sampled and return a sampled SamplingResult.
116136
117-
if TRACESTATE_SAMPLED_KEY not in trace_state:
118-
trace_state = trace_state.add(TRACESTATE_SAMPLED_KEY, "true")
119-
elif trace_state.get(TRACESTATE_SAMPLED_KEY) == "deferred":
120-
trace_state = trace_state.update(TRACESTATE_SAMPLED_KEY, "true")
137+
Update the trace_state with the effective sampled, sample_rate and sample_rand,
138+
and return an OTel SamplingResult with Decision.RECORD_AND_SAMPLE.
121139
122-
if sample_rate is not None:
123-
trace_state = trace_state.update(TRACESTATE_SAMPLE_RATE_KEY, str(sample_rate))
140+
See for more info about OTel sampling:
141+
https://opentelemetry-python.readthedocs.io/en/latest/sdk/trace.sampling.html
142+
"""
143+
trace_state = _update_trace_state(
144+
span_context, sampled=True, sample_rate=sample_rate, sample_rand=sample_rand
145+
)
124146

125147
return SamplingResult(
126148
Decision.RECORD_AND_SAMPLE,
@@ -129,6 +151,27 @@ def sampled_result(span_context, attributes, sample_rate):
129151
)
130152

131153

154+
def _update_trace_state(span_context, sampled, sample_rate=None, sample_rand=None):
155+
# type: (SpanContext, bool, Optional[float], Optional[Decimal]) -> TraceState
156+
trace_state = span_context.trace_state
157+
158+
sampled = "true" if sampled else "false"
159+
if TRACESTATE_SAMPLED_KEY not in trace_state:
160+
trace_state = trace_state.add(TRACESTATE_SAMPLED_KEY, sampled)
161+
elif trace_state.get(TRACESTATE_SAMPLED_KEY) == "deferred":
162+
trace_state = trace_state.update(TRACESTATE_SAMPLED_KEY, sampled)
163+
164+
if sample_rate is not None:
165+
trace_state = trace_state.update(TRACESTATE_SAMPLE_RATE_KEY, str(sample_rate))
166+
167+
if sample_rand is not None:
168+
trace_state = trace_state.update(
169+
TRACESTATE_SAMPLE_RAND_KEY, f"{sample_rand:.6f}" # noqa: E231
170+
)
171+
172+
return trace_state
173+
174+
132175
class SentrySampler(Sampler):
133176
def should_sample(
134177
self,
@@ -156,6 +199,18 @@ def should_sample(
156199

157200
sample_rate = None
158201

202+
parent_sampled = get_parent_sampled(parent_span_context, trace_id)
203+
parent_sample_rate = get_parent_sample_rate(parent_span_context, trace_id)
204+
parent_sample_rand = get_parent_sample_rand(parent_span_context, trace_id)
205+
206+
if parent_sample_rand is not None:
207+
# We have a sample_rand on the incoming trace or we already backfilled
208+
# it in PropagationContext
209+
sample_rand = parent_sample_rand
210+
else:
211+
# We are the head SDK and we need to generate a new sample_rand
212+
sample_rand = cast(Decimal, _generate_sample_rand(str(trace_id), (0, 1)))
213+
159214
# Explicit sampled value provided at start_span
160215
custom_sampled = cast(
161216
"Optional[bool]", attributes.get(SentrySpanAttribute.CUSTOM_SAMPLED)
@@ -165,11 +220,17 @@ def should_sample(
165220
sample_rate = float(custom_sampled)
166221
if sample_rate > 0:
167222
return sampled_result(
168-
parent_span_context, attributes, sample_rate=sample_rate
223+
parent_span_context,
224+
attributes,
225+
sample_rate=sample_rate,
226+
sample_rand=sample_rand,
169227
)
170228
else:
171229
return dropped_result(
172-
parent_span_context, attributes, sample_rate=sample_rate
230+
parent_span_context,
231+
attributes,
232+
sample_rate=sample_rate,
233+
sample_rand=sample_rand,
173234
)
174235
else:
175236
logger.debug(
@@ -190,8 +251,6 @@ def should_sample(
190251
sample_rate_to_propagate = sample_rate
191252
else:
192253
# Check if there is a parent with a sampling decision
193-
parent_sampled = get_parent_sampled(parent_span_context, trace_id)
194-
parent_sample_rate = get_parent_sample_rate(parent_span_context, trace_id)
195254
if parent_sampled is not None:
196255
sample_rate = bool(parent_sampled)
197256
sample_rate_to_propagate = (
@@ -215,17 +274,23 @@ def should_sample(
215274
if client.monitor.downsample_factor > 0:
216275
sample_rate_to_propagate = sample_rate
217276

218-
# Roll the dice on sample rate
277+
# Compare sample_rand to sample_rate to make the final sampling decision
219278
sample_rate = float(cast("Union[bool, float, int]", sample_rate))
220-
sampled = random.random() < sample_rate
279+
sampled = sample_rand < sample_rate
221280

222281
if sampled:
223282
return sampled_result(
224-
parent_span_context, attributes, sample_rate=sample_rate_to_propagate
283+
parent_span_context,
284+
attributes,
285+
sample_rate=sample_rate_to_propagate,
286+
sample_rand=None if sample_rand == parent_sample_rand else sample_rand,
225287
)
226288
else:
227289
return dropped_result(
228-
parent_span_context, attributes, sample_rate=sample_rate_to_propagate
290+
parent_span_context,
291+
attributes,
292+
sample_rate=sample_rate_to_propagate,
293+
sample_rand=None if sample_rand == parent_sample_rand else sample_rand,
229294
)
230295

231296
def get_description(self) -> str:

sentry_sdk/integrations/opentelemetry/scope.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from sentry_sdk.integrations.opentelemetry.utils import trace_state_from_baggage
3131
from sentry_sdk.scope import Scope, ScopeType
3232
from sentry_sdk.tracing import Span
33-
from sentry_sdk.utils import logger
3433
from sentry_sdk._types import TYPE_CHECKING
3534

3635
if TYPE_CHECKING:

sentry_sdk/integrations/stdlib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def putrequest(self, method, url, *args, **kwargs):
7373

7474
client = sentry_sdk.get_client()
7575
if client.get_integration(StdlibIntegration) is None or is_sentry_url(
76-
client, f"{host}:{port}"
76+
client, f"{host}:{port}" # noqa: E231
7777
):
7878
return real_putrequest(self, method, url, *args, **kwargs)
7979

sentry_sdk/tracing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from sentry_sdk.utils import (
2121
_serialize_span_attribute,
2222
get_current_thread_meta,
23-
logger,
2423
should_be_treated_as_error,
2524
)
2625

sentry_sdk/tracing_utils.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import contextlib
2+
import decimal
23
import inspect
34
import os
45
import re
@@ -392,6 +393,9 @@ def from_incoming_data(cls, incoming_data):
392393
propagation_context = PropagationContext()
393394
propagation_context.update(sentrytrace_data)
394395

396+
if propagation_context is not None:
397+
propagation_context._fill_sample_rand()
398+
395399
return propagation_context
396400

397401
@property
@@ -433,6 +437,78 @@ def update(self, other_dict):
433437
except AttributeError:
434438
pass
435439

440+
def _fill_sample_rand(self):
441+
# type: () -> None
442+
"""
443+
Ensure that there is a valid sample_rand value in the baggage.
444+
445+
If there is a valid sample_rand value in the baggage, we keep it.
446+
Otherwise, we generate a sample_rand value according to the following:
447+
448+
- If we have a parent_sampled value and a sample_rate in the DSC, we compute
449+
a sample_rand value randomly in the range:
450+
- [0, sample_rate) if parent_sampled is True,
451+
- or, in the range [sample_rate, 1) if parent_sampled is False.
452+
453+
- If either parent_sampled or sample_rate is missing, we generate a random
454+
value in the range [0, 1).
455+
456+
The sample_rand is deterministically generated from the trace_id, if present.
457+
458+
This function does nothing if there is no dynamic_sampling_context.
459+
"""
460+
if self.dynamic_sampling_context is None or self.baggage is None:
461+
return
462+
463+
sentry_baggage = self.baggage.sentry_items
464+
465+
sample_rand = None
466+
if sentry_baggage.get("sample_rand"):
467+
try:
468+
sample_rand = Decimal(sentry_baggage["sample_rand"])
469+
except Exception:
470+
logger.debug(
471+
f"Failed to convert incoming sample_rand to Decimal: {sample_rand}"
472+
)
473+
474+
if sample_rand is not None and 0 <= sample_rand < 1:
475+
# sample_rand is present and valid, so don't overwrite it
476+
return
477+
478+
sample_rate = None
479+
if sentry_baggage.get("sample_rate"):
480+
try:
481+
sample_rate = float(sentry_baggage["sample_rate"])
482+
except Exception:
483+
logger.debug(
484+
f"Failed to convert incoming sample_rate to float: {sample_rate}"
485+
)
486+
487+
lower, upper = _sample_rand_range(self.parent_sampled, sample_rate)
488+
489+
try:
490+
sample_rand = _generate_sample_rand(self.trace_id, interval=(lower, upper))
491+
except ValueError:
492+
# ValueError is raised if the interval is invalid, i.e. lower >= upper.
493+
# lower >= upper might happen if the incoming trace's sampled flag
494+
# and sample_rate are inconsistent, e.g. sample_rate=0.0 but sampled=True.
495+
# We cannot generate a sensible sample_rand value in this case.
496+
logger.debug(
497+
f"Could not backfill sample_rand, since parent_sampled={self.parent_sampled} "
498+
f"and sample_rate={sample_rate}."
499+
)
500+
return
501+
502+
self.baggage.sentry_items["sample_rand"] = f"{sample_rand:.6f}" # noqa: E231
503+
504+
def _sample_rand(self):
505+
# type: () -> Optional[str]
506+
"""Convenience method to get the sample_rand value from the baggage."""
507+
if self.baggage is None:
508+
return None
509+
510+
return self.baggage.sentry_items.get("sample_rand")
511+
436512
def __repr__(self):
437513
# type: (...) -> str
438514
return "<PropagationContext _trace_id={} _span_id={} parent_span_id={} parent_sampled={} baggage={} dynamic_sampling_context={}>".format(
@@ -684,13 +760,11 @@ def get_current_span(scope=None):
684760
return current_span
685761

686762

687-
# XXX-potel-ivana: use this
688763
def _generate_sample_rand(
689764
trace_id, # type: Optional[str]
690-
*,
691765
interval=(0.0, 1.0), # type: tuple[float, float]
692766
):
693-
# type: (...) -> Any
767+
# type: (...) -> Optional[decimal.Decimal]
694768
"""Generate a sample_rand value from a trace ID.
695769
696770
The generated value will be pseudorandomly chosen from the provided
@@ -709,15 +783,11 @@ def _generate_sample_rand(
709783
while sample_rand >= upper:
710784
sample_rand = rng.uniform(lower, upper)
711785

712-
# Round down to exactly six decimal-digit precision.
713-
# Setting the context is needed to avoid an InvalidOperation exception
714-
# in case the user has changed the default precision.
715786
return Decimal(sample_rand).quantize(
716787
Decimal("0.000001"), rounding=ROUND_DOWN, context=Context(prec=6)
717788
)
718789

719790

720-
# XXX-potel-ivana: use this
721791
def _sample_rand_range(parent_sampled, sample_rate):
722792
# type: (Optional[bool], Optional[float]) -> tuple[float, float]
723793
"""

0 commit comments

Comments
 (0)