Skip to content

Commit 0067fda

Browse files
mergify[bot]nizox
andauthored
Tag traces with HTTP headers from DD_TRACE_HEADER_TAGS (#2782) (#2829)
* Tag traces with HTTP headers from DD_TRACE_HEADER_TAGS Value must be either comma or space separated. e.g. Host:http.host,User-Agent,http.user_agent or referer:http.referer Content-Type:http.content_type. * Better documentation * Fix urllib3 tests * Fix urllib3 tests * Update documentation * Keep the header_is_traced method Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Brett Langdon <[email protected]> Co-authored-by: Kyle Verhoog <[email protected]> (cherry picked from commit fbb64a3) Co-authored-by: Nicolas Vivet <[email protected]>
1 parent 535e0f8 commit 0067fda

File tree

11 files changed

+84
-28
lines changed

11 files changed

+84
-28
lines changed

ddtrace/contrib/trace_utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,11 @@ def _store_headers(headers, span, integration_config, request_or_response):
9797
return
9898

9999
for header_name, header_value in headers.items():
100-
if not integration_config.header_is_traced(header_name):
100+
tag_name = integration_config._header_tag_name(header_name)
101+
if tag_name is None:
101102
continue
102-
tag_name = _normalize_tag_name(request_or_response, header_name)
103-
span.set_tag(tag_name, header_value)
103+
# An empty tag defaults to a http.<request or response>.headers.<header name> tag
104+
span.set_tag(tag_name or _normalize_tag_name(request_or_response, header_name), header_value)
104105

105106

106107
def _store_request_headers(headers, span, integration_config):

ddtrace/profiling/collector/stack.pyx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ from ddtrace.profiling.collector import _traceback
2121
from ddtrace.utils import attr as attr_utils
2222
from ddtrace.utils import formats
2323

24+
2425
try:
2526
import gevent.thread
2627
except ImportError:

ddtrace/settings/config.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from copy import deepcopy
22
import os
33
from typing import List
4+
from typing import Optional
45
from typing import Tuple
56

67
from ddtrace.utils.cache import cachedmethod
@@ -106,7 +107,10 @@ def is_error_code(self, status_code):
106107
def __init__(self):
107108
# use a dict as underlying storing mechanism
108109
self._config = {}
109-
self.http = HttpConfig()
110+
111+
header_tags = parse_tags_str(get_env("trace", "header_tags") or "")
112+
self.http = HttpConfig(header_tags=header_tags)
113+
110114
# Master switch for turning on and off trace search by default
111115
# this weird invocation of get_env is meant to read the DD_ANALYTICS_ENABLED
112116
# legacy environment variable. It should be removed in the future
@@ -203,6 +207,7 @@ def trace_headers(self, whitelist):
203207
return self
204208

205209
def header_is_traced(self, header_name):
210+
# type: (str) -> bool
206211
"""
207212
Returns whether or not the current header should be traced.
208213
:param header_name: the header name
@@ -211,6 +216,10 @@ def header_is_traced(self, header_name):
211216
"""
212217
return self.http.header_is_traced(header_name)
213218

219+
def _header_tag_name(self, header_name):
220+
# type: (str) -> Optional[str]
221+
return self.http._header_tag_name(header_name)
222+
214223
def _get_service(self, default=None):
215224
"""
216225
Returns the globally configured service.

ddtrace/settings/http.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from typing import List
2+
from typing import Mapping
23
from typing import Optional
3-
from typing import Set
44
from typing import Union
55

6+
import six
7+
68
from ..internal.logger import get_logger
79
from ..utils.cache import cachedmethod
810
from ..utils.http import normalize_header_name
@@ -17,15 +19,27 @@ class HttpConfig(object):
1719
related to the http context.
1820
"""
1921

20-
def __init__(self):
21-
# type: () -> None
22-
self._whitelist_headers = set() # type: Set[str]
22+
def __init__(self, header_tags=None):
23+
# type: (Optional[Mapping[str, str]]) -> None
24+
self._header_tags = {normalize_header_name(k): v for k, v in header_tags.items()} if header_tags else {}
2325
self.trace_query_string = None
2426

27+
@cachedmethod()
28+
def _header_tag_name(self, header_name):
29+
# type: (str) -> Optional[str]
30+
if not self._header_tags:
31+
return None
32+
33+
normalized_header_name = normalize_header_name(header_name)
34+
log.debug(
35+
"Checking header '%s' tracing in whitelist %s", normalized_header_name, six.viewkeys(self._header_tags)
36+
)
37+
return self._header_tags.get(normalized_header_name)
38+
2539
@property
2640
def is_header_tracing_configured(self):
2741
# type: () -> bool
28-
return len(self._whitelist_headers) > 0
42+
return len(self._header_tags) > 0
2943

3044
def trace_headers(self, whitelist):
3145
# type: (Union[List[str], str]) -> Optional[HttpConfig]
@@ -44,14 +58,15 @@ def trace_headers(self, whitelist):
4458
normalized_header_name = normalize_header_name(whitelist_entry)
4559
if not normalized_header_name:
4660
continue
47-
self._whitelist_headers.add(normalized_header_name)
61+
# Empty tag is replaced by the default tag for this header:
62+
# Host on the request defaults to http.request.headers.host
63+
self._header_tags.setdefault(normalized_header_name, "")
4864

4965
# Mypy can't catch cached method's invalidate()
50-
self.header_is_traced.invalidate() # type: ignore[attr-defined]
66+
self._header_tag_name.invalidate() # type: ignore[attr-defined]
5167

5268
return self
5369

54-
@cachedmethod()
5570
def header_is_traced(self, header_name):
5671
# type: (str) -> bool
5772
"""
@@ -60,14 +75,9 @@ def header_is_traced(self, header_name):
6075
:type header_name: str
6176
:rtype: bool
6277
"""
63-
if not self._whitelist_headers:
64-
return False
65-
66-
normalized_header_name = normalize_header_name(header_name)
67-
log.debug("Checking header '%s' tracing in whitelist %s", normalized_header_name, self._whitelist_headers)
68-
return normalized_header_name in self._whitelist_headers
78+
return self._header_tag_name(header_name) is not None
6979

7080
def __repr__(self):
7181
return "<{} traced_headers={} trace_query_string={}>".format(
72-
self.__class__.__name__, self._whitelist_headers, self.trace_query_string
82+
self.__class__.__name__, self._header_tags.keys(), self.trace_query_string
7383
)

ddtrace/settings/integration.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,21 @@ def is_header_tracing_configured(self):
8787
return self.http.is_header_tracing_configured or self.global_config.http.is_header_tracing_configured
8888

8989
def header_is_traced(self, header_name):
90+
# type: (str) -> bool
9091
"""
9192
Returns whether or not the current header should be traced.
9293
:param header_name: the header name
9394
:type header_name: str
9495
:rtype: bool
9596
"""
96-
return (
97-
self.http.header_is_traced(header_name)
98-
if self.http.is_header_tracing_configured
99-
else self.global_config.header_is_traced(header_name)
100-
)
97+
return self._header_tag_name(header_name) is not None
98+
99+
def _header_tag_name(self, header_name):
100+
# type: (str) -> Optional[str]
101+
tag_name = self.http._header_tag_name(header_name)
102+
if tag_name is None:
103+
return self.global_config._header_tag_name(header_name)
104+
return tag_name
101105

102106
def _is_analytics_enabled(self, use_global_config):
103107
# DEV: analytics flag can be None which should not be taken as

docs/advanced_usage.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,9 @@ Headers tracing
387387

388388
For a selected set of integrations, it is possible to store http headers from both requests and responses in tags.
389389

390-
Configuration can be provided both at the global level and at the integration level.
390+
The recommended method is to use the ``DD_TRACE_HEADER_TAGS`` environment variable.
391+
392+
Alternatively, configuration can be provided both at the global level and at the integration level in your application code.
391393

392394
Examples::
393395

docs/configuration.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ below:
142142
- 1.0
143143
- A float, f, 0.0 <= f <= 1.0. f*100% of traces will be sampled.
144144

145+
.. _dd-trace-header-tags:
146+
* - ``DD_TRACE_HEADER_TAGS``
147+
- String
148+
-
149+
- A map of case-insensitive header keys to tag names. Automatically applies matching header values as tags on root spans.
150+
For example, ``User-Agent:http.useragent,content-type:http.content_type``.
151+
145152
.. _dd-profiling-enabled:
146153
* - ``DD_PROFILING_ENABLED``
147154
- Boolean
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
features:
3+
- |
4+
Tag traces with HTTP headers specified on the ``DD_TRACE_HEADER_TAGS`` environment variable.
5+
Value must be either comma or space separated. e.g. ``Host:http.host,User-Agent,http.user_agent``
6+
or ``referer:http.referer Content-Type:http.content_type``.

scripts/profiles/django-simple/run.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ export DJANGO_ALLOWED_HOSTS="127.0.0.1"
3030
export DJANGO_SECRET_KEY="SECRET_KEY"
3131
export DATABASE_URL="sqlite:///django.db"
3232

33+
# Tag traces with HTTP headers to benchmark the related code
34+
export DD_TRACE_HEADER_TAGS="User-Agent:http.user_agent,Referer:http.referer,Content-Type:http.content_type,Etag:http.etag"
35+
3336
# Baseline
3437
pushd ${PREFIX}/trace-examples/python/django/django-simple
3538
gunicorn config.wsgi --pid ${PREFIX}/gunicorn.pid > /dev/null &

tests/contrib/urllib3/test_urllib3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def test_args_kwargs(self):
101101

102102
for args, kwargs in inputs:
103103

104-
with self.override_http_config("urllib3", {"_whitelist_headers": set()}):
104+
with self.override_http_config("urllib3", {"_header_tags": dict()}):
105105
config.urllib3.http.trace_headers(["accept"])
106106
pool = urllib3.connectionpool.HTTPConnectionPool(HOST, PORT)
107107
out = pool.urlopen(*args, **kwargs)
@@ -314,7 +314,7 @@ def test_request_and_response_headers(self):
314314
assert s.get_tag("http.response.headers.access-control-allow-origin") is None
315315

316316
# Enabled when explicitly configured
317-
with self.override_http_config("urllib3", {"_whitelist_headers": set()}):
317+
with self.override_http_config("urllib3", {"_header_tags": dict()}):
318318
config.urllib3.http.trace_headers(["my-header", "access-control-allow-origin"])
319319
self.http.request("GET", URL_200, headers={"my-header": "my_value"})
320320
spans = self.pop_spans()

0 commit comments

Comments
 (0)