Skip to content

Commit 336d6f8

Browse files
github-actions[bot]mabdinursanchda
authored
fix(tracing): ensure p is on the tracestate of active spans [backport 2.7] (#8649)
Backport 90a3e3f from #8569 to 2.7. Currently last datadog parent id is always added to the tracestate in `Htttpropagator.inject`. This is incorrect. If a Datadog span is not active (Context object is active) then the `p` value in the tracestate should forward the `_dd.parent_id` value. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Munir Abdinur <[email protected]> Co-authored-by: David Sanchez <[email protected]>
1 parent 6677a11 commit 336d6f8

File tree

12 files changed

+114
-39
lines changed

12 files changed

+114
-39
lines changed

ddtrace/_trace/context.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@
3030
Optional[int], # span_id
3131
_MetaDictType, # _meta
3232
_MetricDictType, # _metrics
33-
list[SpanLink],
34-
dict[str, Any],
33+
list[SpanLink], # span_links
34+
dict[str, Any], # baggage
35+
bool, # is_remote
3536
]
3637

3738

@@ -45,7 +46,7 @@ class Context(object):
4546
boundaries.
4647
"""
4748

48-
__slots__ = ["trace_id", "span_id", "_lock", "_meta", "_metrics", "_span_links", "_baggage"]
49+
__slots__ = ["trace_id", "span_id", "_lock", "_meta", "_metrics", "_span_links", "_baggage", "_is_remote"]
4950

5051
def __init__(
5152
self,
@@ -58,13 +59,15 @@ def __init__(
5859
lock=None, # type: Optional[threading.RLock]
5960
span_links=None, # type: Optional[list[SpanLink]]
6061
baggage=None, # type: Optional[dict[str, Any]]
62+
is_remote=True, # type: bool
6163
):
6264
self._meta = meta if meta is not None else {} # type: _MetaDictType
6365
self._metrics = metrics if metrics is not None else {} # type: _MetricDictType
6466
self._baggage = baggage if baggage is not None else {} # type: dict[str, Any]
6567

6668
self.trace_id = trace_id # type: Optional[int]
6769
self.span_id = span_id # type: Optional[int]
70+
self._is_remote = is_remote # type: bool
6871

6972
if dd_origin is not None and _DD_ORIGIN_INVALID_CHARS_REGEX.search(dd_origin) is None:
7073
self._meta[ORIGIN_KEY] = dd_origin
@@ -91,13 +94,14 @@ def __getstate__(self):
9194
self._meta,
9295
self._metrics,
9396
self._span_links,
94-
self._baggage
97+
self._baggage,
98+
self._is_remote,
9599
# Note: self._lock is not serializable
96100
)
97101

98102
def __setstate__(self, state):
99103
# type: (_ContextState) -> None
100-
self.trace_id, self.span_id, self._meta, self._metrics, self._span_links, self._baggage = state
104+
self.trace_id, self.span_id, self._meta, self._metrics, self._span_links, self._baggage, self._is_remote = state
101105
# We cannot serialize and lock, so we must recreate it unless we already have one
102106
self._lock = threading.RLock()
103107

@@ -111,6 +115,8 @@ def _with_span(self, span):
111115
metrics=self._metrics,
112116
lock=self._lock,
113117
baggage=self._baggage,
118+
span_links=self._span_links,
119+
is_remote=self._is_remote,
114120
)
115121

116122
def _update_tags(self, span):
@@ -251,18 +257,20 @@ def __eq__(self, other):
251257
and self._metrics == other._metrics
252258
and self._span_links == other._span_links
253259
and self._baggage == other._baggage
260+
and self._is_remote == other._is_remote
254261
)
255262
return False
256263

257264
def __repr__(self):
258265
# type: () -> str
259-
return "Context(trace_id=%s, span_id=%s, _meta=%s, _metrics=%s, _span_links=%s, _baggage=%s)" % (
266+
return "Context(trace_id=%s, span_id=%s, _meta=%s, _metrics=%s, _span_links=%s, _baggage=%s, _is_remote=%s)" % (
260267
self.trace_id,
261268
self.span_id,
262269
self._meta,
263270
self._metrics,
264271
self._span_links,
265272
self._baggage,
273+
self._is_remote,
266274
)
267275

268276
__str__ = __repr__

ddtrace/_trace/processor/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from ddtrace.constants import USER_KEEP
1919
from ddtrace.internal import gitmetadata
2020
from ddtrace.internal.constants import HIGHER_ORDER_TRACE_ID_BITS
21+
from ddtrace.internal.constants import LAST_DD_PARENT_ID_KEY
2122
from ddtrace.internal.constants import MAX_UINT_64BITS
2223
from ddtrace.internal.logger import get_logger
2324
from ddtrace.internal.sampling import SpanSamplingRule
@@ -218,6 +219,10 @@ def process_trace(self, trace):
218219
if chunk_root.trace_id > MAX_UINT_64BITS:
219220
trace_id_hob = _get_64_highest_order_bits_as_hex(chunk_root.trace_id)
220221
chunk_root.set_tag_str(HIGHER_ORDER_TRACE_ID_BITS, trace_id_hob)
222+
223+
if LAST_DD_PARENT_ID_KEY in chunk_root._meta and chunk_root._parent is not None:
224+
# we should only set the last parent id on local root spans
225+
del chunk_root._meta[LAST_DD_PARENT_ID_KEY]
221226
return trace
222227

223228

ddtrace/_trace/span.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ def context(self):
546546
# type: () -> Context
547547
"""Return the trace context for this span."""
548548
if self._context is None:
549-
self._context = Context(trace_id=self.trace_id, span_id=self.span_id)
549+
self._context = Context(trace_id=self.trace_id, span_id=self.span_id, is_remote=False)
550550
return self._context
551551

552552
def link_span(self, context, attributes=None):

ddtrace/_trace/tracer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ def _start_span(
642642
sampling_priority=child_of.context.sampling_priority,
643643
span_id=child_of.span_id,
644644
trace_id=child_of.trace_id,
645+
is_remote=False,
645646
)
646647

647648
# If the child_of span was active then activate the new context
@@ -659,7 +660,7 @@ def _start_span(
659660
context = child_of.context
660661
parent = child_of
661662
else:
662-
context = Context()
663+
context = Context(is_remote=False)
663664

664665
trace_id = context.trace_id
665666
parent_id = context.span_id

ddtrace/internal/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@
1919
)
2020
W3C_TRACESTATE_KEY = "tracestate"
2121
W3C_TRACEPARENT_KEY = "traceparent"
22+
W3C_TRACESTATE_PARENT_ID_KEY = "p"
2223
W3C_TRACESTATE_ORIGIN_KEY = "o"
2324
W3C_TRACESTATE_SAMPLING_PRIORITY_KEY = "s"
2425
DEFAULT_SAMPLING_RATE_LIMIT = 100
2526
SAMPLING_DECISION_TRACE_TAG_KEY = "_dd.p.dm"
27+
LAST_DD_PARENT_ID_KEY = "_dd.parent_id"
2628
DEFAULT_SERVICE_NAME = "unnamed-python-service"
2729
# Used to set the name of an integration on a span
2830
COMPONENT = "component"

ddtrace/internal/utils/http.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from ddtrace.internal.constants import DEFAULT_TIMEOUT
2626
from ddtrace.internal.constants import SAMPLING_DECISION_TRACE_TAG_KEY
2727
from ddtrace.internal.constants import W3C_TRACESTATE_ORIGIN_KEY
28+
from ddtrace.internal.constants import W3C_TRACESTATE_PARENT_ID_KEY
2829
from ddtrace.internal.constants import W3C_TRACESTATE_SAMPLING_PRIORITY_KEY
2930
from ddtrace.internal.http import HTTPConnection
3031
from ddtrace.internal.http import HTTPSConnection
@@ -205,6 +206,16 @@ def w3c_encode_tag(args):
205206
return tag_val.replace("=", "~")
206207

207208

209+
def w3c_tracestate_add_p(tracestate, span_id):
210+
# Adds last datadog parent_id to tracestate. This tag is used to reconnect a trace with non-datadog spans
211+
p_member = "{}:{:016x}".format(W3C_TRACESTATE_PARENT_ID_KEY, span_id)
212+
if "dd=" in tracestate:
213+
return tracestate.replace("dd=", f"dd={p_member};")
214+
elif tracestate:
215+
return f"dd={p_member},{tracestate}"
216+
return f"dd={p_member}"
217+
218+
208219
class Response(object):
209220
"""
210221
Custom API Response object to represent a response from calling the API.

ddtrace/opentelemetry/_span.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from ddtrace.internal.logger import get_logger
1616
from ddtrace.internal.utils.formats import flatten_key_value
1717
from ddtrace.internal.utils.formats import is_sequence
18+
from ddtrace.internal.utils.http import w3c_tracestate_add_p
1819

1920

2021
if TYPE_CHECKING:
@@ -137,7 +138,8 @@ def get_span_context(self):
137138
ts = None
138139
tf = TraceFlags.DEFAULT
139140
if self._ddspan.context:
140-
ts = TraceState.from_header([self._ddspan.context._tracestate])
141+
ts_str = w3c_tracestate_add_p(self._ddspan.context._tracestate, self._ddspan.span_id)
142+
ts = TraceState.from_header([ts_str])
141143
if self._ddspan.context.sampling_priority and self._ddspan.context.sampling_priority > 0:
142144
tf = TraceFlags.SAMPLED
143145

ddtrace/propagation/http.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from ..internal.constants import _PROPAGATION_STYLE_NONE
3636
from ..internal.constants import _PROPAGATION_STYLE_W3C_TRACECONTEXT
3737
from ..internal.constants import HIGHER_ORDER_TRACE_ID_BITS as _HIGHER_ORDER_TRACE_ID_BITS
38+
from ..internal.constants import LAST_DD_PARENT_ID_KEY
3839
from ..internal.constants import MAX_UINT_64BITS as _MAX_UINT_64BITS
3940
from ..internal.constants import PROPAGATION_STYLE_B3_MULTI
4041
from ..internal.constants import PROPAGATION_STYLE_B3_SINGLE
@@ -45,6 +46,7 @@
4546
from ..internal.sampling import SAMPLING_DECISION_TRACE_TAG_KEY
4647
from ..internal.sampling import SamplingMechanism
4748
from ..internal.sampling import validate_sampling_decision
49+
from ..internal.utils.http import w3c_tracestate_add_p
4850
from ._utils import get_wsgi_header
4951

5052

@@ -814,7 +816,7 @@ def _get_context(trace_id, span_id, trace_flag, ts, meta=None):
814816
sampling_priority_ts, other_propagated_tags, origin, lpid = tracestate_values
815817
meta.update(other_propagated_tags.items())
816818
if lpid:
817-
meta["_dd.parent_id"] = lpid
819+
meta[LAST_DD_PARENT_ID_KEY] = lpid
818820

819821
sampling_priority = _TraceContext._get_sampling_priority(trace_flag, sampling_priority_ts, origin)
820822
else:
@@ -834,15 +836,17 @@ def _inject(span_context, headers):
834836
tp = span_context._traceparent
835837
if tp:
836838
headers[_HTTP_HEADER_TRACEPARENT] = tp
837-
# only inject tracestate if traceparent injected: https://www.w3.org/TR/trace-context/#tracestate-header
838-
ts = span_context._tracestate
839-
# Adds last datadog parent_id to tracestate. This tag is used to reconnect a trace with non-datadog spans
840-
if "dd=" in ts:
841-
ts = ts.replace("dd=", "dd=p:{:016x};".format(span_context.span_id or 0))
839+
if span_context._is_remote is False:
840+
# Datadog Span is active, so the current span_id is the last datadog span_id
841+
headers[_HTTP_HEADER_TRACESTATE] = w3c_tracestate_add_p(
842+
span_context._tracestate, span_context.span_id or 0
843+
)
844+
elif LAST_DD_PARENT_ID_KEY in span_context._meta:
845+
# Datadog Span is not active, propagate the last datadog span_id
846+
span_id = int(span_context._meta[LAST_DD_PARENT_ID_KEY], 16)
847+
headers[_HTTP_HEADER_TRACESTATE] = w3c_tracestate_add_p(span_context._tracestate, span_id)
842848
else:
843-
ts = "dd=p:{:016x}".format(span_context.span_id or 0)
844-
845-
headers[_HTTP_HEADER_TRACESTATE] = ts
849+
headers[_HTTP_HEADER_TRACESTATE] = span_context._tracestate
846850

847851

848852
class _NOP_Propagator:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
otel: Ensures that the last datadog parent_id is added to w3c distributed tracing headers generated by the OpenTelemetry API.

tests/opentelemetry/test_context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def _subprocess_task(parent_span_context, errors):
8989
ot_tracer._tracer.flush()
9090

9191

92-
@pytest.mark.snapshot
92+
@pytest.mark.snapshot(ignores=["meta.tracestate"])
9393
def test_otel_trace_across_fork(oteltracer):
9494
errors = multiprocessing.Queue()
9595
with oteltracer.start_as_current_span("root") as root:
@@ -102,7 +102,7 @@ def test_otel_trace_across_fork(oteltracer):
102102
assert errors.empty(), errors.get()
103103

104104

105-
@pytest.mark.snapshot(wait_for_num_traces=1)
105+
@pytest.mark.snapshot(wait_for_num_traces=1, ignores=["meta.tracestate"])
106106
@pytest.mark.parametrize("decision", [MANUAL_KEEP_KEY, MANUAL_DROP_KEY], ids=["manual.keep", "manual.drop"])
107107
def test_sampling_decisions_across_processes(oteltracer, decision):
108108
# sampling decision in the subprocess task should be the same as the parent

0 commit comments

Comments
 (0)