Skip to content

Commit e3908f8

Browse files
authored
feat(asm): redact sensitive querystrings in http.url (#3980)
## Description By default query strings are not sent, unless `trace_query_string` is enabled. In case `trace_query_string` is enabled, sensitive query strings will be redacted to be sent in http.url tag. ## Checklist - [x] Title must conform to [conventional commit](https://github.com/conventional-changelog/commitlint/tree/master/%40commitlint/config-conventional). - [x] Add additional sections for `feat` and `fix` pull requests. - [x] Ensure tests are passing for affected code. - [ ] [Library documentation](https://github.com/DataDog/dd-trace-py/tree/1.x/docs) and/or [Datadog's documentation site](https://github.com/DataDog/documentation/) is updated. Link to doc PR in description. ## Motivation ASM will enable security analysis for the query strings in the backend, so query strings will be sent by default once sensitive query strings are redacted. ## Design This PR enables sensitive query strings redaction, next step will be enabling `trace_query_string` by default. ## Testing strategy - A test added checking with an example of a sensitive query string. - System tests for sensitive query string obfuscation. - Performance test to evaluate re.sub overhead. ## Reviewer Checklist - [ ] Title is accurate. - [ ] Description motivates each change. - [ ] No unnecessary changes were introduced in this PR. - [ ] PR cannot be broken up into smaller PRs. - [ ] Avoid breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Tests provided or description of manual testing performed is included in the code or PR. - [ ] Release note has been added for fixes and features, or else `changelog/no-changelog` label added. - [ ] All relevant GitHub issues are correctly linked. - [ ] Backports are identified and tagged with Mergifyio. - [ ] Add to milestone.
1 parent 4f8ec5a commit e3908f8

File tree

20 files changed

+387
-17
lines changed

20 files changed

+387
-17
lines changed

ddtrace/contrib/aiohttp/patch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ async def _traced_clientsession_request(aiohttp, pin, func, instance, args, kwar
7777
span,
7878
config.aiohttp_client,
7979
method=method,
80-
url=url_str,
80+
url=str(url),
8181
query=parsed_url.query,
8282
request_headers=headers,
8383
)

ddtrace/contrib/trace_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from ddtrace.internal.utils.cache import cached
2929
from ddtrace.internal.utils.formats import asbool
3030
from ddtrace.internal.utils.http import normalize_header_name
31+
from ddtrace.internal.utils.http import redact_url
3132
from ddtrace.internal.utils.http import strip_query_string
3233
import ddtrace.internal.utils.wrappers
3334
from ddtrace.propagation.http import HTTPPropagator
@@ -419,7 +420,10 @@ def set_http_meta(
419420
span._set_str_tag(http.METHOD, method)
420421

421422
if url is not None:
422-
span._set_str_tag(http.URL, url if integration_config.trace_query_string else strip_query_string(url))
423+
if integration_config.trace_query_string and not config.global_trace_query_string_disabled:
424+
span._set_str_tag(http.URL, redact_url(url, config._obfuscation_query_string_pattern, query))
425+
else:
426+
span._set_str_tag(http.URL, strip_query_string(url))
423427

424428
if status_code is not None:
425429
try:

ddtrace/internal/utils/http.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from contextlib import contextmanager
2+
import re
23
from typing import Any
34
from typing import Callable
45
from typing import ContextManager
56
from typing import Generator
67
from typing import Optional
8+
from typing import Tuple
79
from typing import Union
810

911
from ddtrace.internal import compat
@@ -40,6 +42,60 @@ def strip_query_string(url):
4042
return h + fs + f
4143

4244

45+
def redact_query_string(query_string, query_string_obfuscation_pattern):
46+
# type: (str, Optional[re.Pattern]) -> Union[bytes, str]
47+
if query_string_obfuscation_pattern is None:
48+
return query_string
49+
50+
bytes_query = query_string if isinstance(query_string, bytes) else query_string.encode("utf-8")
51+
return query_string_obfuscation_pattern.sub(b"<redacted>", bytes_query)
52+
53+
54+
def redact_url(url, query_string_obfuscation_pattern, query_string=None):
55+
# type: (str, re.Pattern, Optional[str]) -> Union[str,bytes]
56+
57+
# Avoid further processing if obfuscation is disabled
58+
if query_string_obfuscation_pattern is None:
59+
return url
60+
61+
parts = compat.parse.urlparse(url)
62+
redacted_query = None
63+
64+
if query_string:
65+
redacted_query = redact_query_string(query_string, query_string_obfuscation_pattern)
66+
elif parts.query:
67+
redacted_query = redact_query_string(parts.query, query_string_obfuscation_pattern)
68+
69+
if redacted_query is not None and len(parts) >= 5:
70+
redacted_parts = parts[:4] + (redacted_query,) + parts[5:] # type: Tuple[Union[str, bytes], ...]
71+
bytes_redacted_parts = tuple(x if isinstance(x, bytes) else x.encode("utf-8") for x in redacted_parts)
72+
return urlunsplit(bytes_redacted_parts, url)
73+
74+
# If no obfuscation is performed, return original url
75+
return url
76+
77+
78+
def urlunsplit(components, original_url):
79+
# type: (Tuple[bytes, ...], str) -> bytes
80+
"""
81+
Adaptation from urlunsplit and urlunparse, using bytes components
82+
"""
83+
scheme, netloc, url, params, query, fragment = components
84+
if params:
85+
url = b"%s;%s" % (url, params)
86+
if netloc or (scheme and url[:2] != b"//"):
87+
if url and url[:1] != b"/":
88+
url = b"/" + url
89+
url = b"//%s%s" % ((netloc or b""), url)
90+
if scheme:
91+
url = b"%s:%s" % (scheme, url)
92+
if query or (original_url and original_url[-1] in ("?", b"?")):
93+
url = b"%s?%s" % (url, query)
94+
if fragment or (original_url and original_url[-1] in ("#", b"#")):
95+
url = b"%s#%s" % (url, fragment)
96+
return url
97+
98+
4399
def connector(url, **kwargs):
44100
# type: (str, Any) -> Connector
45101
"""Create a connector context manager for the given URL.

ddtrace/settings/config.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from copy import deepcopy
22
import os
3+
import re
34
from typing import List
45
from typing import Optional
56
from typing import Tuple
@@ -19,6 +20,18 @@
1920
log = get_logger(__name__)
2021

2122

23+
DD_TRACE_OBFUSCATION_QUERY_STRING_PATTERN_DEFAULT = (
24+
r"(?i)(?:p(?:ass)?w(?:or)?d|pass(?:_?phrase)?|secret|(?:api_?|"
25+
r"private_?|public_?|access_?|secret_?)key(?:_?id)?|token|consumer_?(?:id|key|secret)|"
26+
r'sign(?:ed|ature)?|auth(?:entication|orization)?)(?:(?:\s|%20)*(?:=|%3D)[^&]+|(?:"|%22)'
27+
r'(?:\s|%20)*(?::|%3A)(?:\s|%20)*(?:"|%22)(?:%2[^2]|%[^2]|[^"%])+(?:"|%22))|bearer(?:\s|%20)'
28+
r"+[a-z0-9\._\-]|token(?::|%3A)[a-z0-9]{13}|gh[opsu]_[0-9a-zA-Z]{36}|ey[I-L](?:[\w=-]|%3D)+\.ey[I-L]"
29+
r"(?:[\w=-]|%3D)+(?:\.(?:[\w.+\/=-]|%3D|%2F|%2B)+)?|[\-]{5}BEGIN(?:[a-z\s]|%20)+"
30+
r"PRIVATE(?:\s|%20)KEY[\-]{5}[^\-]+[\-]{5}END(?:[a-z\s]|%20)+PRIVATE(?:\s|%20)KEY|"
31+
r"ssh-rsa(?:\s|%20)*(?:[a-z0-9\/\.+]|%2F|%5C|%2B){100,}"
32+
)
33+
34+
2235
def _parse_propagation_styles(name, default):
2336
# type: (str, str) -> set[str]
2437
"""Helper to parse http propagation extract/inject styles via env variables.
@@ -212,6 +225,20 @@ def __init__(self):
212225
)
213226
self._appsec_enabled = asbool(os.getenv("DD_APPSEC_ENABLED", False))
214227

228+
dd_trace_obfuscation_query_string_pattern = os.getenv(
229+
"DD_TRACE_OBFUSCATION_QUERY_STRING_PATTERN", DD_TRACE_OBFUSCATION_QUERY_STRING_PATTERN_DEFAULT
230+
)
231+
self.global_trace_query_string_disabled = False
232+
self._obfuscation_query_string_pattern = None
233+
if dd_trace_obfuscation_query_string_pattern != "":
234+
try:
235+
self._obfuscation_query_string_pattern = re.compile(
236+
dd_trace_obfuscation_query_string_pattern.encode("ascii")
237+
)
238+
except Exception:
239+
log.warning("Invalid obfuscation pattern, disabling query string tracing")
240+
self.global_trace_query_string_disabled = True
241+
215242
def __getattr__(self, name):
216243
if name not in self._config:
217244
self._config[name] = IntegrationConfig(self, name)

docs/advanced_usage.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,14 @@ Examples::
373373
# Integration level config, e.g. 'falcon'
374374
config.falcon.http.trace_query_string = True
375375

376+
The sensitive query strings (e.g: token, password) are obfuscated by default.
377+
378+
It is possible to configure the obfuscation regexp by setting the ``DD_TRACE_OBFUSCATION_QUERY_STRING_PATTERN`` environment variable.
379+
380+
To disable query string obfuscation, set the ``DD_TRACE_OBFUSCATION_QUERY_STRING_PATTERN`` environment variable to empty string ("")
381+
382+
If the ``DD_TRACE_OBFUSCATION_QUERY_STRING_PATTERN`` environment variable is set to an invalid regexp, the query strings will not be traced.
383+
376384
.. _http-headers-tracing:
377385

378386
Headers tracing

docs/configuration.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,12 @@ below:
200200
- The trace API version to use when sending traces to the Datadog agent.
201201
Currently, the supported versions are: ``v0.3``, ``v0.4`` and ``v0.5``.
202202

203+
.. _dd-trace-obfuscation-query-string-pattern:
204+
* - ``DD_TRACE_OBFUSCATION_QUERY_STRING_PATTERN``
205+
- String
206+
- ``(?i)(?:p(?:ass)?w(?:or)?d|pass(?:_?phrase)?|secret|(?:api_?|private_?|public_?|access_?|secret_?)key(?:_?id)?|token|consumer_?(?:id|key|secret)|sign(?:ed|ature)?|auth(?:entication|orization)?)(?:(?:\s|%20)*(?:=|%3D)[^&]+|(?:"|%22)(?:\s|%20)*(?::|%3A)(?:\s|%20)*(?:"|%22)(?:%2[^2]|%[^2]|[^"%])+(?:"|%22))|bearer(?:\s|%20)+[a-z0-9\._\-]|token(?::|%3A)[a-z0-9]{13}|gh[opsu]_[0-9a-zA-Z]{36}|ey[I-L](?:[\w=-]|%3D)+\.ey[I-L](?:[\w=-]|%3D)+(?:\.(?:[\w.+\/=-]|%3D|%2F|%2B)+)?|[\-]{5}BEGIN(?:[a-z\s]|%20)+PRIVATE(?:\s|%20)KEY[\-]{5}[^\-]+[\-]{5}END(?:[a-z\s]|%20)+PRIVATE(?:\s|%20)KEY|ssh-rsa(?:\s|%20)*(?:[a-z0-9\/\.+]|%2F|%5C|%2B){100,}.``
207+
- A regexp to redact sensitive query strings. Obfuscation disabled if set to empty string
208+
203209
.. _dd-trace-propagation-style-extract:
204210
* - ``DD_TRACE_PROPAGATION_STYLE_EXTRACT``
205211
- String
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
features:
3+
- |
4+
ASM: Redact sensitive query strings if sent in http.url.

tests/contrib/asgi/test_asgi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,14 +366,14 @@ async def test_multiple_requests(tracer, test_spans):
366366
assert r1_span.name == "asgi.request"
367367
assert r1_span.span_type == "web"
368368
assert r1_span.get_tag("http.method") == "GET"
369-
assert r1_span.get_tag("http.url") == "http://testserver/"
369+
assert r1_span.get_tag("http.url") == "http://testserver/?sleep=true"
370370
assert r1_span.get_tag("http.query.string") == "sleep=true"
371371

372372
r2_span = spans[0][0]
373373
assert r2_span.name == "asgi.request"
374374
assert r2_span.span_type == "web"
375375
assert r2_span.get_tag("http.method") == "GET"
376-
assert r2_span.get_tag("http.url") == "http://testserver/"
376+
assert r2_span.get_tag("http.url") == "http://testserver/?sleep=true"
377377
assert r2_span.get_tag("http.query.string") == "sleep=true"
378378

379379

tests/contrib/bottle/test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,11 @@ def hi(name):
6767
assert s.resource == "GET /hi/<name>"
6868
assert_span_http_status_code(s, 200)
6969
assert s.get_tag("http.method") == "GET"
70-
assert s.get_tag(http.URL) == "http://localhost:80/hi/dougie"
7170
if ddtrace.config.bottle.trace_query_string:
71+
assert s.get_tag(http.URL) == "http://localhost:80/hi/dougie" + fqs
7272
assert s.get_tag(http.QUERY_STRING) == query_string
7373
else:
74+
assert s.get_tag(http.URL) == "http://localhost:80/hi/dougie"
7475
assert http.QUERY_STRING not in s.get_tags()
7576

7677
def test_query_string(self):

tests/contrib/fastapi/test_fastapi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def test_read_item_query_string(client, tracer, test_spans):
172172
assert request_span.resource == "GET /items/{item_id}"
173173
assert request_span.error == 0
174174
assert request_span.get_tag("http.method") == "GET"
175-
assert request_span.get_tag("http.url") == "http://testserver/items/foo"
175+
assert request_span.get_tag("http.url") == "http://testserver/items/foo?q=query"
176176
assert request_span.get_tag("http.status_code") == "200"
177177
assert request_span.get_tag("http.query.string") == "q=query"
178178

@@ -195,7 +195,7 @@ def test_200_multi_query_string(client, tracer, test_spans):
195195
assert request_span.resource == "GET /items/{item_id}"
196196
assert request_span.error == 0
197197
assert request_span.get_tag("http.method") == "GET"
198-
assert request_span.get_tag("http.url") == "http://testserver/items/foo"
198+
assert request_span.get_tag("http.url") == "http://testserver/items/foo?name=Foo&q=query"
199199
assert request_span.get_tag("http.status_code") == "200"
200200
assert request_span.get_tag("http.query.string") == "name=Foo&q=query"
201201

0 commit comments

Comments
 (0)