refactor(tracer): move span sampling to end of start_span (#5352)

Yun-Kim · ZStriker19 · web-flow · commit 1dd239cfbd82 · 2023-03-29T15:03:25.000-04:00
Currently, the tracer's `_start_span()` method performs sampling rules before all tags are set on the span in question. This was indirectly the cause of the bug fixed in #5339, since the sampling happened before all tags were set. This PR moves the sampling logic to the end of the method after all other tags have been set. In doing so, this also removes the duplicate code introduced in #5339. No new tests are required since there is no new functionality introduced, just the order of when sampling decisions are made in `start_span()` has been shifted. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Author is aware of the performance implications of this PR as reported in the benchmarks PR comment. ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer is aware of, and discussed the performance implications of this PR as reported in the benchmarks PR comment. --------- Co-authored-by: Zachary Groves <32471391+ZStriker19@users.noreply.github.com>
diff --git a/ddtrace/tracer.py b/ddtrace/tracer.py
@@ -695,34 +695,6 @@ def _start_span(
             if config.report_hostname:
                 span.set_tag_str(HOSTNAME_KEY, hostname.get_hostname())
 
-            if config.env:
-                span.set_tag_str(ENV_KEY, config.env)  # env tag is used by _sampler.sample
-            span.sampled = self._sampler.sample(span)
-            # Old behavior
-            # DEV: The new sampler sets metrics and priority sampling on the span for us
-            if not isinstance(self._sampler, DatadogSampler):
-                if span.sampled:
-                    # When doing client sampling in the client, keep the sample rate so that we can
-                    # scale up statistics in the next steps of the pipeline.
-                    if isinstance(self._sampler, RateSampler):
-                        span.set_metric(SAMPLE_RATE_METRIC_KEY, self._sampler.sample_rate)
-
-                    if self._priority_sampler:
-                        # At this stage, it's important to have the service set. If unset,
-                        # priority sampler will use the default sampling rate, which might
-                        # lead to oversampling (that is, dropping too many traces).
-                        if self._priority_sampler.sample(span):
-                            context.sampling_priority = AUTO_KEEP
-                        else:
-                            context.sampling_priority = AUTO_REJECT
-                else:
-                    if self._priority_sampler:
-                        # If dropped by the local sampler, distributed instrumentation can drop it too.
-                        context.sampling_priority = AUTO_REJECT
-            else:
-                # We must always mark the span as sampled so it is forwarded to the agent
-                span.sampled = True
-
         if not span._parent:
             span.set_tag_str("runtime-id", get_runtime_id())
             span._metrics[PID] = self._pid
@@ -753,12 +725,39 @@ def _start_span(
         if service and service not in self._services and self._is_span_internal(span):
             self._services.add(service)
 
+        if not trace_id:
+            span.sampled = self._sampler.sample(span)
+            # Old behavior
+            # DEV: The new sampler sets metrics and priority sampling on the span for us
+            if not isinstance(self._sampler, DatadogSampler):
+                if span.sampled:
+                    # When doing client sampling in the client, keep the sample rate so that we can
+                    # scale up statistics in the next steps of the pipeline.
+                    if isinstance(self._sampler, RateSampler):
+                        span.set_metric(SAMPLE_RATE_METRIC_KEY, self._sampler.sample_rate)
+
+                    if self._priority_sampler:
+                        # At this stage, it's important to have the service set. If unset,
+                        # priority sampler will use the default sampling rate, which might
+                        # lead to oversampling (that is, dropping too many traces).
+                        if self._priority_sampler.sample(span):
+                            context.sampling_priority = AUTO_KEEP
+                        else:
+                            context.sampling_priority = AUTO_REJECT
+                else:
+                    if self._priority_sampler:
+                        # If dropped by the local sampler, distributed instrumentation can drop it too.
+                        context.sampling_priority = AUTO_REJECT
+            else:
+                # We must always mark the span as sampled so it is forwarded to the agent
+                span.sampled = True
+
         # Only call span processors if the tracer is enabled
         if self.enabled:
             for p in self._span_processors:
                 p.on_span_start(span)
-
         self._hooks.emit(self.__class__.start_span, span)
+
         return span
 
     start_span = _start_span