Skip to content

Commit 762a5f7

Browse files
authored
fix(elasticsearch): omit large body tag values [backport #5229 to 1.9] (#5256)
The elasticsearch body tag value in practice can be very large. Since the value is not truncated, it can easily result in traces exceeding the max size limit which results in dropped traces. The proposed solution is to omit the value but include a message that notifies that the body has been omitted due to size. This is because obfuscation logic lives in the Agent and so the body cannot be truncated as sensitive data would be leaked. No body tag is better than no trace. Ideally the obfuscation logic should exist in the client so that the body can be truncated.
1 parent e6029d5 commit 762a5f7

File tree

5 files changed

+49
-1
lines changed

5 files changed

+49
-1
lines changed

ddtrace/_tracing/__init__.py

Whitespace-only changes.

ddtrace/_tracing/_limits.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""
2+
Limits for trace data.
3+
"""
4+
5+
MAX_SPAN_META_KEY_LEN = 200
6+
MAX_SPAN_META_VALUE_LEN = 25000

ddtrace/contrib/elasticsearch/patch.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from importlib import import_module
22

33
from ddtrace import config
4+
from ddtrace._tracing import _limits
45
from ddtrace.contrib.trace_utils import ext_service
56
from ddtrace.internal.constants import COMPONENT
67
from ddtrace.vendor.wrapt import wrap_function_wrapper as _w
@@ -95,7 +96,19 @@ def _perform_request(func, instance, args, kwargs):
9596
span.set_tag_str(http.QUERY_STRING, encoded_params)
9697

9798
if method in ["GET", "POST"]:
98-
span.set_tag_str(metadata.BODY, instance.serializer.dumps(body))
99+
ser_body = instance.serializer.dumps(body)
100+
# Elasticsearch request bodies can be very large resulting in traces being too large
101+
# to send.
102+
# When this occurs, drop the value.
103+
# Ideally the body should be truncated, however we cannot truncate as the obfuscation
104+
# logic for the body lives in the agent and truncating would make the body undecodable.
105+
if len(ser_body) <= _limits.MAX_SPAN_META_VALUE_LEN:
106+
span.set_tag_str(metadata.BODY, ser_body)
107+
else:
108+
span.set_tag_str(
109+
metadata.BODY,
110+
"<body size %s exceeds limit of %s>" % (len(ser_body), _limits.MAX_SPAN_META_VALUE_LEN),
111+
)
99112
status = None
100113

101114
# set analytics sample rate
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
fixes:
3+
- |
4+
elasticsearch: Omit large ``elasticsearch.body`` tag values that are
5+
greater than 25000 characters to prevent traces from being too large to
6+
send.

tests/contrib/elasticsearch/test_elasticsearch.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import datetime
22
from importlib import import_module
33

4+
import pytest
5+
46
from ddtrace import Pin
57
from ddtrace import config
68
from ddtrace.constants import ANALYTICS_SAMPLE_RATE_KEY
@@ -272,3 +274,24 @@ def _get_es(self):
272274

273275
def _get_index_args(self):
274276
return {"index": self.ES_INDEX, "doc_type": self.ES_TYPE}
277+
278+
@pytest.mark.skipif(
279+
(7, 0, 0) <= elasticsearch.__version__ <= (7, 1, 0), reason="test isn't compatible these elasticsearch versions"
280+
)
281+
def test_large_body(self):
282+
"""
283+
Ensure large bodies are omitted to prevent large traces from being produced.
284+
"""
285+
args = self._get_index_args()
286+
body = {
287+
"query": {"range": {"created": {"gte": "asdf" * 25000}}},
288+
}
289+
# it doesn't matter if the request fails, so long as a span is generated
290+
try:
291+
self.es.search(size=100, body=body, **args)
292+
except Exception:
293+
pass
294+
spans = self.get_spans()
295+
self.reset()
296+
assert len(spans) == 1
297+
assert len(spans[0].get_tag("elasticsearch.body")) < 25000

0 commit comments

Comments
 (0)