Skip to content

Commit b63f2b8

Browse files
ZStriker19mabdinur
andauthored
chore(w3c): replace ~ with _ and = with ~ (#4822) [backport #4822 to 1.7] (#4871)
## Description Upon review we found that `:` should be allowed in the tags value: https://docs.datadoghq.com/getting_started/tagging/#define-tags We were using `:` as a way to represent an "encoded" = in the propagated trace tag values, so we decided to change the "encoded" value of the `=` to `~`. Therefore, we should allow `=` in tag values. In order to do this in tracestate, we need to encode it. Therefore we encode `~`s in the value as `_` first, and then encode `=` as `~`. ## Checklist - [x] Followed the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) when writing a release note. - [ ] Add additional sections for `feat` and `fix` pull requests. - [ ] [Library documentation](https://github.com/DataDog/dd-trace-py/tree/1.x/docs) and/or [Datadog's documentation site](https://github.com/DataDog/documentation/) is updated. Link to doc PR in description. <!-- Copy and paste the relevant snippet based on the type of pull request --> ## Reviewer Checklist - [ ] Title is accurate. - [ ] Description motivates each change. - [ ] No unnecessary changes were introduced in this PR. - [ ] Avoid breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Tests provided or description of manual testing performed is included in the code or PR. - [ ] Release note has been added and follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines), or else `changelog/no-changelog` label added. - [ ] All relevant GitHub issues are correctly linked. - [ ] Backports are identified and tagged with Mergifyio. Co-authored-by: Munir Abdinur <[email protected]> ## Description <!-- Briefly describe the change and why it was required. --> <!-- If this is a breaking change, explain why it is necessary. Breaking changes must append `!` after the type/scope. See https://ddtrace.readthedocs.io/en/stable/contributing.html for more details. --> ## Checklist - [ ] Followed the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) when writing a release note. - [ ] Add additional sections for `feat` and `fix` pull requests. - [ ] [Library documentation](https://github.com/DataDog/dd-trace-py/tree/1.x/docs) and/or [Datadog's documentation site](https://github.com/DataDog/documentation/) is updated. Link to doc PR in description. <!-- Copy and paste the relevant snippet based on the type of pull request --> <!-- START feat --> ## Motivation <!-- Expand on why the change is required, include relevant context for reviewers --> ## Design <!-- Include benefits from the change as well as possible drawbacks and trade-offs --> ## Testing strategy <!-- Describe the automated tests and/or the steps for manual testing. <!-- END feat --> <!-- START fix --> ## Relevant issue(s) <!-- Link the pull request to any issues related to the fix. Use keywords for links to automate closing the issues once the pull request is merged. --> ## Testing strategy <!-- Describe any added regression tests and/or the manual testing performed. --> <!-- END fix --> ## Reviewer Checklist - [ ] Title is accurate. - [ ] Description motivates each change. - [ ] No unnecessary changes were introduced in this PR. - [ ] Avoid breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Tests provided or description of manual testing performed is included in the code or PR. - [ ] Release note has been added and follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines), or else `changelog/no-changelog` label added. - [ ] All relevant GitHub issues are correctly linked. - [ ] Backports are identified and tagged with Mergifyio. Co-authored-by: Munir Abdinur <[email protected]>
1 parent 6829075 commit b63f2b8

File tree

5 files changed

+96
-21
lines changed

5 files changed

+96
-21
lines changed

ddtrace/internal/utils/http.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import ContextManager
77
from typing import Generator
88
from typing import Optional
9+
from typing import Pattern
910
from typing import Tuple
1011
from typing import Union
1112

@@ -19,7 +20,8 @@
1920
from ddtrace.internal.utils.cache import cached
2021

2122

22-
_W3C_TRACESTATE_INVALID_CHARS_REGEX = r",|;|:|[^\x20-\x7E]+"
23+
_W3C_TRACESTATE_INVALID_CHARS_REGEX_VALUE = re.compile(r",|;|~|[^\x20-\x7E]+")
24+
_W3C_TRACESTATE_INVALID_CHARS_REGEX_KEY = re.compile(r",| |=|[^\x20-\x7E]+")
2325

2426

2527
Connector = Callable[[], ContextManager[compat.httplib.HTTPConnection]]
@@ -157,15 +159,22 @@ def w3c_get_dd_list_member(context):
157159
if context.sampling_priority is not None:
158160
tags.append("{}:{}".format(W3C_TRACESTATE_SAMPLING_PRIORITY_KEY, context.sampling_priority))
159161
if context.dd_origin:
160-
# the origin value has specific values that are allowed.
161-
tags.append("{}:{}".format(W3C_TRACESTATE_ORIGIN_KEY, re.sub(r",|;|=|[^\x20-\x7E]+", "_", context.dd_origin)))
162+
tags.append(
163+
"{}:{}".format(
164+
W3C_TRACESTATE_ORIGIN_KEY,
165+
w3c_encode_tag((_W3C_TRACESTATE_INVALID_CHARS_REGEX_VALUE, "_", context.dd_origin)),
166+
)
167+
)
168+
162169
sampling_decision = context._meta.get(SAMPLING_DECISION_TRACE_TAG_KEY)
163170
if sampling_decision:
164-
tags.append("t.dm:{}".format(re.sub(_W3C_TRACESTATE_INVALID_CHARS_REGEX, "_", sampling_decision)))
171+
tags.append(
172+
"t.dm:{}".format((w3c_encode_tag((_W3C_TRACESTATE_INVALID_CHARS_REGEX_VALUE, "_", sampling_decision))))
173+
)
165174
# since this can change, we need to grab the value off the current span
166-
usr_id_key = context._meta.get(USER_ID_KEY)
167-
if usr_id_key:
168-
tags.append("t.usr.id:{}".format(re.sub(_W3C_TRACESTATE_INVALID_CHARS_REGEX, "_", usr_id_key)))
175+
usr_id = context._meta.get(USER_ID_KEY)
176+
if usr_id:
177+
tags.append("t.usr.id:{}".format(w3c_encode_tag((_W3C_TRACESTATE_INVALID_CHARS_REGEX_VALUE, "_", usr_id))))
169178

170179
current_tags_len = sum(len(i) for i in tags)
171180
for k, v in context._meta.items():
@@ -176,10 +185,11 @@ def w3c_get_dd_list_member(context):
176185
and k not in [SAMPLING_DECISION_TRACE_TAG_KEY, USER_ID_KEY]
177186
):
178187
# for key replace ",", "=", and characters outside the ASCII range 0x20 to 0x7E
179-
# for value replace ",", ";", ":" and characters outside the ASCII range 0x20 to 0x7E
188+
# for value replace ",", ";", "~" and characters outside the ASCII range 0x20 to 0x7E
189+
k = k.replace("_dd.p.", "t.")
180190
next_tag = "{}:{}".format(
181-
re.sub("_dd.p.", "t.", re.sub(r",| |=|[^\x20-\x7E]+", "_", k)),
182-
re.sub(_W3C_TRACESTATE_INVALID_CHARS_REGEX, "_", v),
191+
w3c_encode_tag((_W3C_TRACESTATE_INVALID_CHARS_REGEX_KEY, "_", k)),
192+
w3c_encode_tag((_W3C_TRACESTATE_INVALID_CHARS_REGEX_VALUE, "_", v)),
183193
)
184194
# we need to keep the total length under 256 char
185195
potential_current_tags_len = current_tags_len + len(next_tag)
@@ -190,3 +200,12 @@ def w3c_get_dd_list_member(context):
190200
log.debug("tracestate would exceed 256 char limit with tag: %s. Tag will not be added.", next_tag)
191201

192202
return ";".join(tags)
203+
204+
205+
@cached()
206+
def w3c_encode_tag(args):
207+
# type: (Tuple[Pattern, str, str]) -> str
208+
pattern, replacement, tag_val = args
209+
tag_val = pattern.sub(replacement, tag_val)
210+
# replace = with ~ if it wasn't already replaced by the regex
211+
return tag_val.replace("=", "~")

ddtrace/propagation/http.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,11 @@ class _TraceContext:
560560
``_dd.p.`` prefix = ``t.``
561561
"""
562562

563+
@staticmethod
564+
def decode_tag_val(tag_val):
565+
# type str -> str
566+
return tag_val.replace("~", "=")
567+
563568
@staticmethod
564569
def _get_traceparent_values(tp):
565570
# type: (str) -> Tuple[int, int, int]
@@ -616,7 +621,8 @@ def _get_tracestate_values(ts):
616621
if list_mem.startswith("dd="):
617622
# cut out dd= before turning into dict
618623
list_mem = list_mem[3:]
619-
dd = dict(item.split(":") for item in list_mem.split(";"))
624+
# since tags can have a value with a :, we need to only split on the first instance of :
625+
dd = dict(item.split(":", 1) for item in list_mem.split(";"))
620626

621627
# parse out values
622628
if dd:
@@ -627,8 +633,13 @@ def _get_tracestate_values(ts):
627633
sampling_priority_ts_int = None
628634

629635
origin = dd.get("o")
636+
if origin:
637+
# we encode "=" as "~" in tracestate so need to decode here
638+
origin = _TraceContext.decode_tag_val(origin)
630639
# need to convert from t. to _dd.p.
631-
other_propagated_tags = {"_dd.p.%s" % k[2:]: v for (k, v) in dd.items() if (k.startswith("t."))}
640+
other_propagated_tags = {
641+
"_dd.p.%s" % k[2:]: _TraceContext.decode_tag_val(v) for (k, v) in dd.items() if k.startswith("t.")
642+
}
632643

633644
return sampling_priority_ts_int, other_propagated_tags, origin
634645
else:

tests/tracer/test_context.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,14 +267,14 @@ def test_traceparent(context, expected_traceparent):
267267
Context(),
268268
"",
269269
),
270-
( # for value replace ",", ";", ":" and characters outside the ASCII range 0x20 to 0x7E with _
270+
( # for value replace ",", ";" and characters outside the ASCII range 0x20 to 0x7E with _
271271
Context(
272272
trace_id=11803532876627986230,
273273
span_id=67667974448284343,
274274
sampling_priority=1,
275275
meta={
276276
"tracestate": "dd=s:1;o:rum;t.dm:-4;t.usr.id:baz64",
277-
"_dd.p.dm": ";5:",
277+
"_dd.p.dm": ";5;",
278278
"_dd.p.usr.id": "b,z64,",
279279
"_dd.p.unk": ";2",
280280
},
@@ -307,7 +307,8 @@ def test_traceparent(context, expected_traceparent):
307307
},
308308
dd_origin=";r,um=",
309309
),
310-
"dd=s:1;o:_r_um_",
310+
# = is encoded as ~
311+
"dd=s:1;o:_r_um~",
311312
),
312313
],
313314
ids=[

tests/tracer/test_propagation.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -650,11 +650,39 @@ def test_extract_traceparent(caplog, headers, expected_tuple, expected_logging,
650650
["received invalid dd header value in tracestate: 'dd=invalid,congo=123'"],
651651
ValueError,
652652
),
653-
(
653+
( # "ts_string,expected_tuple,expected_logging,expected_exception",
654654
"dd=foo|bar:hi|l¢¢¢¢¢¢:",
655+
(None, {}, None),
656+
None,
657+
None,
658+
),
659+
(
660+
"dd=s:2;o:rum;t.dm:-4;t.usr.id:baz6~~~4",
661+
# sampling_priority_ts, other_propagated_tags, origin
662+
(
663+
2,
664+
{
665+
"_dd.p.dm": "-4",
666+
"_dd.p.usr.id": "baz6===4",
667+
},
668+
"rum",
669+
),
670+
None,
671+
None,
672+
),
673+
(
674+
"dd=s:2;o:rum;t.dm:-4;t.usr.id:baz:6:4",
675+
# sampling_priority_ts, other_propagated_tags, origin
676+
(
677+
2,
678+
{
679+
"_dd.p.dm": "-4",
680+
"_dd.p.usr.id": "baz:6:4",
681+
},
682+
"rum",
683+
),
684+
None,
655685
None,
656-
["received invalid tracestate header: 'dd=foo|bar:hi|l¢¢¢¢¢¢:"],
657-
ValueError,
658686
),
659687
],
660688
ids=[
@@ -669,6 +697,8 @@ def test_extract_traceparent(caplog, headers, expected_tuple, expected_logging,
669697
"tracestate_no_origin",
670698
"tracestate_invalid_dd_list_member",
671699
"tracestate_invalid_tracestate_char_outside_ascii_range_20-70",
700+
"tracestate_tilda_replaced_with_equals",
701+
"tracestate_colon_acceptable_char_in_value",
672702
],
673703
)
674704
def test_extract_tracestate(caplog, ts_string, expected_tuple, expected_logging, expected_exception):

tests/tracer/test_utils.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def _():
457457
["s:2", "o:synthetics", "t.unknown:baz64"],
458458
),
459459
( # for key replace ",", "=", and characters outside the ASCII range 0x20 to 0x7E with _
460-
# for value replace ",", ";", ":" and characters outside the ASCII range 0x20 to 0x7E with _
460+
# for value replace ",", ";", and characters outside the ASCII range 0x20 to 0x7E with _
461461
Context(
462462
trace_id=1234,
463463
sampling_priority=2,
@@ -466,10 +466,11 @@ def _():
466466
"_dd.p.unk": "-4",
467467
"_dd.p.unknown": "baz64",
468468
"_dd.p.¢": ";4",
469-
"_dd.p.u=,": "b:,¢a",
469+
# colons are allowed in tag values
470+
"_dd.p.u¢,": "b:,¢a",
470471
},
471472
),
472-
["s:2", "o:synthetics", "t.unk:-4", "t.unknown:baz64", "t._:_4", "t.u__:b___a"],
473+
["s:2", "o:synthetics", "t.unk:-4", "t.unknown:baz64", "t._:_4", "t.u__:b:__a"],
473474
),
474475
(
475476
Context(
@@ -483,6 +484,18 @@ def _():
483484
),
484485
["s:0", "o:synthetics", "t.unk:-4", "t.unknown:baz64"],
485486
),
487+
(
488+
Context(
489+
trace_id=1234,
490+
sampling_priority=2,
491+
dd_origin="syn=",
492+
meta={
493+
"_dd.p.unk": "-4~",
494+
"_dd.p.unknown": "baz64",
495+
},
496+
),
497+
["s:2", "o:syn~", "t.unk:-4_", "t.unknown:baz64"],
498+
),
486499
],
487500
ids=[
488501
"basic",
@@ -491,6 +504,7 @@ def _():
491504
"does_not_add_more_than_256_char",
492505
"char_replacement",
493506
"sampling_priority_0",
507+
"value_tilda_and_equals_sign_replacement",
494508
],
495509
)
496510
# since we are looping through a dict, we can't predict the order of some of the tags

0 commit comments

Comments
 (0)