Skip to content

Commit dc806e2

Browse files
Propagate context internal tags through http headers (#3045)
* add tagset module * Allow encoding a subclass of dict * Propagate context meta through http headers * Add HTTPPropagator.extract benchmark * fix typing issues * expand benchmark test cases * fix flake8 issue * fix type casting * use ensure_str instead of ensure_text * add tests for invalid tags * add scaffolding for http inject benchmarks * replace existing benchmarks with new inject/extract scnearios * fix benchmark scenario * disallow leading commas, remove runtime type check * add warning logs * add test case for handling of unicode keys and values * remove unnused imports * Update benchmarks/http_propagation_extract/config.yaml * do not propagate on any error * Update ddtrace/propagation/http.py * fix tag assertion * do not encode tags when we previously had an error Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
1 parent d6d1705 commit dc806e2

File tree

10 files changed

+448
-75
lines changed

10 files changed

+448
-75
lines changed
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# No headers provided
2+
empty_headers: &default_values
3+
headers: "{}"
4+
extra_headers: 0
5+
wsgi_style: False
6+
7+
# 20 headers, but none that we expect
8+
medium_header_no_matches: &medium_header_no_matches
9+
headers: "{}"
10+
extra_headers: 20
11+
wsgi_style: False
12+
13+
# 100 headers, but none that we expect
14+
large_header_no_matches: &large_header_no_matches
15+
headers: "{}"
16+
extra_headers: 100
17+
wsgi_style: False
18+
19+
# Only trace id/span id/priority
20+
valid_headers_basic: &valid_headers_basic
21+
<<: *default_values
22+
headers: |
23+
{"x-datadog-trace-id": "1234", "x-datadog-span-id": "5678", "x-datadog-sampling-priority": "1"}
24+
25+
# All possible headers we expect
26+
valid_headers_all: &valid_headers_all
27+
<<: *default_values
28+
headers: |
29+
{"x-datadog-trace-id": "1234", "x-datadog-span-id": "5678", "x-datadog-sampling-priority": "1", "x-datadog-origin": "synthetics", "x-datadog-tags": "_dd.p.tag=value,_dd.p.some_long_key=some_long_value"}
30+
31+
# All valid/possible headers but 20 additional unrelated headers
32+
medium_valid_headers_all: &medium_valid_headers_all
33+
<<: *valid_headers_all
34+
extra_headers: 20
35+
36+
# All valid/possible headers but 100 additional unrelated headers
37+
large_valid_headers_all: &large_valid_headers_all
38+
<<: *valid_headers_all
39+
extra_headers: 100
40+
41+
# x-datadog-trace-id is invalid
42+
invalid_trace_id_header: &invalid_trace_id_header
43+
<<: *default_values
44+
headers: |
45+
{"x-datadog-trace-id": "trace_id", "x-datadog-span-id": "5678", "x-datadog-sampling-priority": "1", "x-datadog-origin": "synthetics", "x-datadog-tags": "_dd.p.tag=value,_dd.p.some_long_key=some_long_value"}
46+
47+
# x-datadog-span-id is invalid
48+
invalid_span_id_header: &invalid_span_id_header
49+
<<: *default_values
50+
headers: |
51+
{"x-datadog-trace-id": "1234", "x-datadog-span-id": "span_id", "x-datadog-sampling-priority": "1", "x-datadog-origin": "synthetics", "x-datadog-tags": "_dd.p.tag=value,_dd.p.some_long_key=some_long_value"}
52+
53+
# x-datadog-sampling-priority is invalid
54+
invalid_priority_header: &invalid_priority_header
55+
<<: *default_values
56+
headers: |
57+
{"x-datadog-trace-id": "1234", "x-datadog-span-id": "5678", "x-datadog-sampling-priority": "priority", "x-datadog-origin": "synthetics", "x-datadog-tags": "_dd.p.tag=value,_dd.p.some_long_key=some_long_value"}
58+
59+
# x-datadog-tags is invalid
60+
invalid_tags_header: &invalid_tags_header
61+
<<: *default_values
62+
headers: |
63+
{"x-datadog-trace-id": "1234", "x-datadog-span-id": "5678", "x-datadog-sampling-priority": "1", "x-datadog-origin": "synthetics", "x-datadog-tags": "_dd.p.tag=value,_dd.p.some_long_key=some_long_value,key=,=value,"}
64+
65+
66+
# Same scenarios as above but with HTTP_WSGI_STYLE_HEADERS
67+
wsgi_empty_headers:
68+
<<: *default_values
69+
wsgi_style: True
70+
71+
wsgi_medium_header_no_matches:
72+
<<: *medium_header_no_matches
73+
wsgi_style: True
74+
75+
wsgi_large_header_no_matches:
76+
<<: *large_header_no_matches
77+
wsgi_style: True
78+
79+
wsgi_valid_headers_basic:
80+
<<: *valid_headers_basic
81+
wsgi_style: True
82+
83+
wsgi_valid_headers_all:
84+
<<: *valid_headers_all
85+
wsgi_style: True
86+
87+
wsgi_medium_valid_headers_all:
88+
<<: *medium_valid_headers_all
89+
wsgi_style: True
90+
91+
wsgi_large_valid_headers_all:
92+
<<: *large_valid_headers_all
93+
wsgi_style: True
94+
95+
wsgi_invalid_trace_id_header:
96+
<<: *invalid_trace_id_header
97+
wsgi_style: True
98+
99+
wsgi_invalid_span_id_header:
100+
<<: *default_values
101+
wsgi_style: True
102+
103+
wsgi_invalid_priority_header:
104+
<<: *invalid_priority_header
105+
wsgi_style: True
106+
107+
wsgi_invalid_tags_header:
108+
<<: *invalid_tags_header
109+
wsgi_style: True
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import json
2+
3+
import bm
4+
5+
from ddtrace.propagation import http
6+
from ddtrace.propagation import utils
7+
8+
9+
class HTTPPropagationExtract(bm.Scenario):
10+
headers = bm.var(type=str)
11+
extra_headers = bm.var(type=int)
12+
wsgi_style = bm.var(type=bool)
13+
14+
def generate_headers(self):
15+
headers = json.loads(self.headers)
16+
if self.wsgi_style:
17+
headers = {utils.get_wsgi_header(header): value for header, value in headers.items()}
18+
19+
for i in range(self.extra_headers):
20+
header = "x-test-header-{}".format(i)
21+
if self.wsgi_style:
22+
header = utils.get_wsgi_header(header)
23+
headers[header] = str(i)
24+
25+
return headers
26+
27+
def run(self):
28+
headers = self.generate_headers()
29+
30+
def _(loops):
31+
for _ in range(loops):
32+
http.HTTPPropagator.extract(headers)
33+
34+
yield _
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
ids_only: &defaults
2+
sampling_priority: ""
3+
dd_origin: ""
4+
meta: ""
5+
6+
with_sampling_priority:
7+
<<: *defaults
8+
sampling_priority: "1"
9+
10+
with_dd_origin:
11+
<<: *defaults
12+
dd_origin: "synthetics"
13+
14+
with_priority_and_origin:
15+
<<: *defaults
16+
sampling_priority: "1"
17+
dd_origin: "synthetics"
18+
19+
with_tags:
20+
<<: *defaults
21+
meta: |
22+
{"_dd.p.test": "value", "_dd.p.sample": "value", "will.be": "skipped"}
23+
24+
with_all:
25+
<<: *defaults
26+
sampling_priority: "1"
27+
dd_origin: "synthetics"
28+
meta: |
29+
{"_dd.p.test": "value", "_dd.p.sample": "value", "will.be": "skipped"}
30+
31+
with_tags_invalid:
32+
<<: *defaults
33+
meta: |
34+
{"_dd.p.test": "value", "_dd.p.test=": "=value,"}
35+
36+
with_tags_max_size:
37+
<<: *defaults
38+
# The limit is 512, so one of these will can be encoded, but not both
39+
meta: |
40+
{"_dd.p.test": "______________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________", "_dd.p.sample": "______________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________________"}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import json
2+
3+
import bm
4+
5+
from ddtrace.context import Context
6+
from ddtrace.propagation import http
7+
8+
9+
class HTTPPropagationInject(bm.Scenario):
10+
sampling_priority = bm.var(type=str)
11+
dd_origin = bm.var(type=str)
12+
meta = bm.var(type=str)
13+
14+
def run(self):
15+
sampling_priority = None
16+
if self.sampling_priority != "":
17+
sampling_priority = int(self.sampling_priority)
18+
dd_origin = self.dd_origin or None
19+
20+
meta = None
21+
if self.meta:
22+
meta = json.loads(self.meta)
23+
24+
ctx = Context(
25+
trace_id=8336172473188639332,
26+
span_id=6804240797025004118,
27+
sampling_priority=sampling_priority,
28+
dd_origin=dd_origin,
29+
meta=meta,
30+
)
31+
32+
def _(loops):
33+
for _ in range(loops):
34+
# Just pass in a new/empty dict, we don't care about the result
35+
http.HTTPPropagator.inject(ctx, {})
36+
37+
yield _

ddtrace/internal/_tagset.pyx

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,27 @@ cpdef dict decode_tagset_string(str tagset):
135135

136136
return res
137137

138+
cdef bint _key_is_valid(str key):
139+
"""Helper to ensure a key's characters are all valid"""
140+
if not key:
141+
return 0
142+
143+
for c in key:
144+
if not is_valid_key_char(ord(c)):
145+
return 0
146+
return 1
147+
148+
149+
cdef bint _value_is_valid(str value):
150+
"""Helper to ensure a values's characters are all valid"""
151+
if not value:
152+
return 0
153+
154+
for c in value:
155+
if not is_valid_key_char(ord(c)):
156+
return 0
157+
return 1
158+
138159

139160
cpdef str encode_tagset_values(object values, int max_size=512):
140161
# type: (Dict[str, str], int) -> str
@@ -164,20 +185,10 @@ cpdef str encode_tagset_values(object values, int max_size=512):
164185
key = key.strip(" ")
165186
value = value.strip(" ")
166187

167-
if not key:
168-
raise TagsetEncodeError("Key cannot be empty")
169-
if not value:
170-
raise TagsetEncodeError("Value cannot be empty")
171-
172-
# Disallow " ", ",", and "=" in keys
173-
for c in (" ", ",", "="):
174-
if c in key:
175-
raise TagsetEncodeError("Unexpected {!r} in key {!r}".format(c, key))
176-
177-
# Disallow "," and "=" in keys
178-
for c in (",", "="):
179-
if c in value:
180-
raise TagsetEncodeError("Unexpected {!r} in value {!r}".format(c, value))
188+
if not _key_is_valid(key):
189+
raise TagsetEncodeError("Key is not valid: {!r}".format(key))
190+
if not _value_is_valid(value):
191+
raise TagsetEncodeError("Value is not valid: {!r}".format(value))
181192

182193
encoded = "{}={}".format(key, value)
183194
# Prefix every item except the first with `,` for separator

ddtrace/internal/compat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
reload_module = six.moves.reload_module
4949

5050
ensure_text = six.ensure_text
51+
ensure_str = six.ensure_str
5152
stringify = six.text_type
5253
string_type = six.string_types[0]
5354
binary_type = six.binary_type

ddtrace/propagation/http.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
from typing import Dict
22
from typing import FrozenSet
33
from typing import Optional
4+
from typing import Union
5+
from typing import cast
46

57
from ..context import Context
8+
from ..internal._tagset import TagsetDecodeError
9+
from ..internal._tagset import TagsetEncodeError
10+
from ..internal._tagset import TagsetMaxSizeError
11+
from ..internal._tagset import decode_tagset_string
12+
from ..internal._tagset import encode_tagset_values
13+
from ..internal.compat import ensure_str
614
from ..internal.logger import get_logger
715
from .utils import get_wsgi_header
816

@@ -15,6 +23,7 @@
1523
HTTP_HEADER_PARENT_ID = "x-datadog-parent-id"
1624
HTTP_HEADER_SAMPLING_PRIORITY = "x-datadog-sampling-priority"
1725
HTTP_HEADER_ORIGIN = "x-datadog-origin"
26+
HTTP_HEADER_TAGS = "x-datadog-tags"
1827

1928

2029
# Note that due to WSGI spec we have to also check for uppercased and prefixed
@@ -25,6 +34,7 @@
2534
[HTTP_HEADER_SAMPLING_PRIORITY, get_wsgi_header(HTTP_HEADER_SAMPLING_PRIORITY).lower()]
2635
)
2736
POSSIBLE_HTTP_HEADER_ORIGIN = frozenset([HTTP_HEADER_ORIGIN, get_wsgi_header(HTTP_HEADER_ORIGIN).lower()])
37+
POSSIBLE_HTTP_HEADER_TAGS = frozenset([HTTP_HEADER_TAGS, get_wsgi_header(HTTP_HEADER_TAGS).lower()])
2838

2939

3040
class HTTPPropagator(object):
@@ -60,6 +70,34 @@ def parent_call():
6070
if span_context.dd_origin is not None:
6171
headers[HTTP_HEADER_ORIGIN] = str(span_context.dd_origin)
6272

73+
# Do not try to encode tags if we have already tried and received an error
74+
if "_dd.propagation_error" in span_context._meta:
75+
return
76+
77+
# Only propagate tags that start with `_dd.p.`
78+
tags_to_encode = {} # type: Dict[str, str]
79+
for key, value in span_context._meta.items():
80+
# DEV: encoding will fail if the key or value are not `str`
81+
key = ensure_str(key)
82+
if key.startswith("_dd.p."):
83+
tags_to_encode[key] = ensure_str(value)
84+
85+
if tags_to_encode:
86+
encoded_tags = None
87+
88+
try:
89+
encoded_tags = encode_tagset_values(tags_to_encode)
90+
except TagsetMaxSizeError:
91+
# We hit the max size allowed, add a tag to the context to indicate this happened
92+
span_context._meta["_dd.propagation_error"] = "max_size"
93+
log.warning("failed to encode x-datadog-tags", exc_info=True)
94+
except TagsetEncodeError:
95+
# We hit an encoding error, add a tag to the context to indicate this happened
96+
span_context._meta["_dd.propagation_error"] = "encoding_error"
97+
log.warning("failed to encode x-datadog-tags", exc_info=True)
98+
if encoded_tags:
99+
headers[HTTP_HEADER_TAGS] = encoded_tags
100+
63101
@staticmethod
64102
def _extract_header_value(possible_header_names, headers, default=None):
65103
# type: (FrozenSet[str], Dict[str, str], Optional[str]) -> Optional[str]
@@ -117,6 +155,19 @@ def my_controller(url, headers):
117155
POSSIBLE_HTTP_HEADER_ORIGIN,
118156
normalized_headers,
119157
)
158+
meta = None
159+
tags_value = HTTPPropagator._extract_header_value(
160+
POSSIBLE_HTTP_HEADER_TAGS,
161+
normalized_headers,
162+
default="",
163+
)
164+
if tags_value:
165+
# Do not fail if the tags are malformed
166+
try:
167+
# We get a Dict[str, str], but need it to be Dict[Union[str, bytes], str] (e.g. _MetaDictType)
168+
meta = cast(Dict[Union[str, bytes], str], decode_tagset_string(tags_value))
169+
except TagsetDecodeError:
170+
log.debug("failed to decode x-datadog-tags: %r", tags_value, exc_info=True)
120171

121172
# Try to parse values into their expected types
122173
try:
@@ -131,15 +182,20 @@ def my_controller(url, headers):
131182
span_id=int(parent_span_id) or None, # type: ignore[arg-type]
132183
sampling_priority=sampling_priority, # type: ignore[arg-type]
133184
dd_origin=origin,
185+
meta=meta,
134186
)
135187
# If headers are invalid and cannot be parsed, return a new context and log the issue.
136188
except (TypeError, ValueError):
137189
log.debug(
138-
"received invalid x-datadog-* headers, trace-id: %r, parent-id: %r, priority: %r, origin: %r",
190+
(
191+
"received invalid x-datadog-* headers, "
192+
"trace-id: %r, parent-id: %r, priority: %r, origin: %r, tags: %r"
193+
),
139194
trace_id,
140195
parent_span_id,
141196
sampling_priority,
142197
origin,
198+
tags_value,
143199
)
144200
return Context()
145201
except Exception:

0 commit comments

Comments
 (0)