Skip to content

Commit 5a51e2d

Browse files
wantsuiquinna-hmabdinuremmettbutler
authored
fix(tracing): truncate long span attributes (#13270) [backport 2.21] (#13811)
2.21 backport of: #13270 Truncate span resource name, tag key and tag values. Previously, a very large resource name would result in a runtime error during encoding. If any of these have over 25000 chars, this will truncate them to up to 2500 chars (and include the suffix `<truncated>`) The agent will truncate based on the limits [here](https://docs.datadoghq.com/tracing/troubleshooting/?tab=java#data-volume-guidelines) Resolves: - #13221 - #6587 - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- (cherry picked from commit 5aa32d1) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Quinna Halim <[email protected]> Co-authored-by: Munir Abdinur <[email protected]> Co-authored-by: Emmett Butler <[email protected]>
1 parent 29e331b commit 5a51e2d

7 files changed

+87
-38
lines changed

ddtrace/_trace/_limits.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44

55
MAX_SPAN_META_KEY_LEN = 200
66
MAX_SPAN_META_VALUE_LEN = 25000
7+
TRUNCATED_SPAN_ATTRIBUTE_LEN = 2500

ddtrace/internal/_encoding.pyx

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ from ..constants import _ORIGIN_KEY as ORIGIN_KEY
2323
from .constants import SPAN_LINKS_KEY
2424
from .constants import SPAN_EVENTS_KEY
2525
from .constants import MAX_UINT_64BITS
26+
from .._trace._limits import MAX_SPAN_META_VALUE_LEN
27+
from .._trace._limits import TRUNCATED_SPAN_ATTRIBUTE_LEN
2628

2729

2830
DEF MSGPACK_ARRAY_LENGTH_PREFIX_SIZE = 5
@@ -92,6 +94,10 @@ cdef inline int array_prefix_size(stdint.uint32_t l):
9294
return 3
9395
return MSGPACK_ARRAY_LENGTH_PREFIX_SIZE
9496

97+
cdef inline object truncate_string(object string):
98+
if string and len(string) > MAX_SPAN_META_VALUE_LEN:
99+
return string[:TRUNCATED_SPAN_ATTRIBUTE_LEN - 14] + "<truncated>..."
100+
return string
95101

96102
cdef inline int pack_bytes(msgpack_packer *pk, char *bs, Py_ssize_t l):
97103
cdef int ret
@@ -129,31 +135,35 @@ cdef inline int pack_text(msgpack_packer *pk, object text) except? -1:
129135

130136
if PyBytesLike_Check(text):
131137
L = len(text)
132-
if L > ITEM_LIMIT:
138+
if L > MAX_SPAN_META_VALUE_LEN:
133139
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(text).tp_name)
140+
text = truncate_string(text)
141+
L = len(text)
134142
ret = msgpack_pack_raw(pk, L)
135143
if ret == 0:
136144
ret = msgpack_pack_raw_body(pk, <char *> text, L)
137145
return ret
138146

139147
if PyUnicode_Check(text):
148+
if len(text) > MAX_SPAN_META_VALUE_LEN:
149+
text = truncate_string(text)
140150
IF PY_MAJOR_VERSION >= 3:
141-
ret = msgpack_pack_unicode(pk, text, ITEM_LIMIT)
151+
ret = msgpack_pack_unicode(pk, text, MAX_SPAN_META_VALUE_LEN)
142152
if ret == -2:
143153
raise ValueError("unicode string is too large")
144154
ELSE:
145155
text = PyUnicode_AsEncodedString(text, "utf-8", NULL)
146156
L = len(text)
147-
if L > ITEM_LIMIT:
157+
if L > MAX_SPAN_META_VALUE_LEN:
148158
raise ValueError("unicode string is too large")
149159
ret = msgpack_pack_raw(pk, L)
150160
if ret == 0:
151161
ret = msgpack_pack_raw_body(pk, <char *> text, L)
162+
152163
return ret
153164

154165
raise TypeError("Unhandled text type: %r" % type(text))
155166

156-
157167
cdef class StringTable(object):
158168
cdef dict _table
159169
cdef stdint.uint32_t _next_id
@@ -220,7 +230,6 @@ cdef class ListStringTable(StringTable):
220230
cdef class MsgpackStringTable(StringTable):
221231
cdef msgpack_packer pk
222232
cdef int max_size
223-
cdef int _max_string_length
224233
cdef int _sp_len
225234
cdef stdint.uint32_t _sp_id
226235
cdef object _lock
@@ -232,7 +241,6 @@ cdef class MsgpackStringTable(StringTable):
232241
if self.pk.buf == NULL:
233242
raise MemoryError("Unable to allocate internal buffer.")
234243
self.max_size = max_size
235-
self._max_string_length = int(0.1*max_size)
236244
self.pk.length = MSGPACK_STRING_TABLE_LENGTH_PREFIX_SIZE
237245
self._sp_len = 0
238246
self._lock = threading.RLock()
@@ -248,15 +256,13 @@ cdef class MsgpackStringTable(StringTable):
248256
cdef insert(self, object string):
249257
cdef int ret
250258

251-
if len(string) > self._max_string_length:
252-
string = "<dropped string of length %d because it's too long (max allowed length %d)>" % (
253-
len(string), self._max_string_length
254-
)
259+
# Before inserting, truncate the string if it is greater than MAX_SPAN_META_VALUE_LEN
260+
string = truncate_string(string)
255261

256262
if self.pk.length + len(string) > self.max_size:
257263
raise ValueError(
258-
"Cannot insert '%s': string table is full (current size: %d, max size: %d)." % (
259-
string, self.pk.length, self.max_size
264+
"Cannot insert '%s': string table is full (current size: %d, size after insert: %d, max size: %d)." % (
265+
string, self.pk.length, (self.pk.length + len(string)), self.max_size
260266
)
261267
)
262268

@@ -846,6 +852,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
846852
raise
847853

848854
cdef inline int _pack_string(self, object string) except? -1:
855+
string = truncate_string(string)
849856
return msgpack_pack_uint32(&self.pk, self._st._index(string))
850857

851858
cdef void * get_dd_origin_ref(self, str dd_origin):
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
tracing: Fixes an issue where span attributes were not truncated before encoding, leading to runtime error and causing spans to be dropped.
5+
Spans with resource name, tag key or value larger than 25000 characters will be truncated to 2500 characters.

tests/integration/test_integration.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -175,32 +175,6 @@ def test_payload_too_large():
175175
log.error.assert_not_called()
176176

177177

178-
@skip_if_testagent
179-
@pytest.mark.subprocess(
180-
env=dict(
181-
DD_TRACE_API_VERSION="v0.5",
182-
DD_TRACE_WRITER_BUFFER_SIZE_BYTES=str(FOUR_KB),
183-
)
184-
)
185-
def test_resource_name_too_large():
186-
import pytest
187-
188-
from ddtrace.trace import tracer as t
189-
from tests.integration.test_integration import FOUR_KB
190-
191-
assert t._writer._buffer_size == FOUR_KB
192-
s = t.trace("operation", service="foo")
193-
# Maximum string length is set to 10% of the maximum buffer size
194-
s.resource = "B" * int(0.1 * FOUR_KB + 1)
195-
try:
196-
s.finish()
197-
except ValueError:
198-
pytest.fail()
199-
encoded_spans, size = t._writer._encoder.encode()
200-
assert size == 1
201-
assert b"<dropped string of length 410 because it's too long (max allowed length 409)>" in encoded_spans
202-
203-
204178
@parametrize_with_all_encodings
205179
def test_large_payload_is_sent_without_warning_logs():
206180
import mock

tests/integration/test_integration_snapshots.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,13 @@ def test_setting_span_tags_and_metrics_generates_no_error_logs():
286286
s.set_metric("number2", 12.0)
287287
s.set_metric("number3", "1")
288288
s.finish()
289+
290+
291+
@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
292+
@pytest.mark.snapshot()
293+
def test_encode_span_with_large_string_attributes(encoding):
294+
from ddtrace import tracer
295+
296+
with override_global_config(dict(_trace_api=encoding)):
297+
with tracer.trace(name="a" * 25000, resource="b" * 25001) as span:
298+
span.set_tag(key="c" * 25001, value="d" * 2000)

tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.4].json

Lines changed: 26 additions & 0 deletions
Large diffs are not rendered by default.

tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.5].json

Lines changed: 26 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)