fix(tracing): truncate long span attributes (#13270) [backport 2.21] (#13811)

wantsui · quinna-h · mabdinur · web-flow · commit 5a51e2ddb6bc · 2025-07-21T19:46:49.000Z
2.21 backport of: #13270 Truncate span resource name, tag key and tag values. Previously, a very large resource name would result in a runtime error during encoding. If any of these have over 25000 chars, this will truncate them to up to 2500 chars (and include the suffix `<truncated>`) The agent will truncate based on the limits [here](https://docs.datadoghq.com/tracing/troubleshooting/?tab=java#data-volume-guidelines) Resolves: - #13221 - #6587 - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- (cherry picked from commit 5aa32d1) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Quinna Halim <quinna.halim@datadoghq.com> Co-authored-by: Munir Abdinur <munir_abdinur@hotmail.com> Co-authored-by: Emmett Butler <723615+emmettbutler@users.noreply.github.com>
diff --git a/ddtrace/_trace/_limits.py b/ddtrace/_trace/_limits.py
@@ -4,3 +4,4 @@
 
 MAX_SPAN_META_KEY_LEN = 200
 MAX_SPAN_META_VALUE_LEN = 25000
+TRUNCATED_SPAN_ATTRIBUTE_LEN = 2500
diff --git a/ddtrace/internal/_encoding.pyx b/ddtrace/internal/_encoding.pyx
@@ -23,6 +23,8 @@ from ..constants import _ORIGIN_KEY as ORIGIN_KEY
 from .constants import SPAN_LINKS_KEY
 from .constants import SPAN_EVENTS_KEY
 from .constants import MAX_UINT_64BITS
+from .._trace._limits import MAX_SPAN_META_VALUE_LEN
+from .._trace._limits import TRUNCATED_SPAN_ATTRIBUTE_LEN
 
 
 DEF MSGPACK_ARRAY_LENGTH_PREFIX_SIZE = 5
@@ -92,6 +94,10 @@ cdef inline int array_prefix_size(stdint.uint32_t l):
         return 3
     return MSGPACK_ARRAY_LENGTH_PREFIX_SIZE
 
+cdef inline object truncate_string(object string):
+    if string and len(string) > MAX_SPAN_META_VALUE_LEN:
+        return string[:TRUNCATED_SPAN_ATTRIBUTE_LEN - 14] + "<truncated>..."
+    return string
 
 cdef inline int pack_bytes(msgpack_packer *pk, char *bs, Py_ssize_t l):
     cdef int ret
@@ -129,31 +135,35 @@ cdef inline int pack_text(msgpack_packer *pk, object text) except? -1:
 
     if PyBytesLike_Check(text):
         L = len(text)
-        if L > ITEM_LIMIT:
+        if L > MAX_SPAN_META_VALUE_LEN:
             PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(text).tp_name)
+            text = truncate_string(text)
+            L = len(text)
         ret = msgpack_pack_raw(pk, L)
         if ret == 0:
             ret = msgpack_pack_raw_body(pk, <char *> text, L)
         return ret
 
     if PyUnicode_Check(text):
+        if len(text) > MAX_SPAN_META_VALUE_LEN:
+            text = truncate_string(text)
         IF PY_MAJOR_VERSION >= 3:
-            ret = msgpack_pack_unicode(pk, text, ITEM_LIMIT)
+            ret = msgpack_pack_unicode(pk, text, MAX_SPAN_META_VALUE_LEN)
             if ret == -2:
                 raise ValueError("unicode string is too large")
         ELSE:
             text = PyUnicode_AsEncodedString(text, "utf-8", NULL)
             L = len(text)
-            if L > ITEM_LIMIT:
+            if L > MAX_SPAN_META_VALUE_LEN:
                 raise ValueError("unicode string is too large")
             ret = msgpack_pack_raw(pk, L)
             if ret == 0:
                 ret = msgpack_pack_raw_body(pk, <char *> text, L)
+
         return ret
 
     raise TypeError("Unhandled text type: %r" % type(text))
 
-
 cdef class StringTable(object):
     cdef dict _table
     cdef stdint.uint32_t _next_id
@@ -220,7 +230,6 @@ cdef class ListStringTable(StringTable):
 cdef class MsgpackStringTable(StringTable):
     cdef msgpack_packer pk
     cdef int max_size
-    cdef int _max_string_length
     cdef int _sp_len
     cdef stdint.uint32_t _sp_id
     cdef object _lock
@@ -232,7 +241,6 @@ cdef class MsgpackStringTable(StringTable):
         if self.pk.buf == NULL:
             raise MemoryError("Unable to allocate internal buffer.")
         self.max_size = max_size
-        self._max_string_length = int(0.1*max_size)
         self.pk.length = MSGPACK_STRING_TABLE_LENGTH_PREFIX_SIZE
         self._sp_len = 0
         self._lock = threading.RLock()
@@ -248,15 +256,13 @@ cdef class MsgpackStringTable(StringTable):
     cdef insert(self, object string):
         cdef int ret
 
-        if len(string) > self._max_string_length:
-            string = "<dropped string of length %d because it's too long (max allowed length %d)>" % (
-                len(string), self._max_string_length
-            )
+        # Before inserting, truncate the string if it is greater than MAX_SPAN_META_VALUE_LEN
+        string = truncate_string(string)
 
         if self.pk.length + len(string) > self.max_size:
             raise ValueError(
-                "Cannot insert '%s': string table is full (current size: %d, max size: %d)." % (
-                    string, self.pk.length, self.max_size
+                "Cannot insert '%s': string table is full (current size: %d, size after insert: %d, max size: %d)." % (
+                    string, self.pk.length, (self.pk.length + len(string)), self.max_size
                 )
             )
 
@@ -846,6 +852,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
                 raise
 
     cdef inline int _pack_string(self, object string) except? -1:
+        string = truncate_string(string)
         return msgpack_pack_uint32(&self.pk, self._st._index(string))
 
     cdef void * get_dd_origin_ref(self, str dd_origin):
diff --git a/releasenotes/notes/truncate-long-span-attributes-a4900ee9844160b4.yaml b/releasenotes/notes/truncate-long-span-attributes-a4900ee9844160b4.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    tracing: Fixes an issue where span attributes were not truncated before encoding, leading to runtime error and causing spans to be dropped. 
+    Spans with resource name, tag key or value larger than 25000 characters will be truncated to 2500 characters. 
diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py
@@ -175,32 +175,6 @@ def test_payload_too_large():
         log.error.assert_not_called()
 
 
-@skip_if_testagent
-@pytest.mark.subprocess(
-    env=dict(
-        DD_TRACE_API_VERSION="v0.5",
-        DD_TRACE_WRITER_BUFFER_SIZE_BYTES=str(FOUR_KB),
-    )
-)
-def test_resource_name_too_large():
-    import pytest
-
-    from ddtrace.trace import tracer as t
-    from tests.integration.test_integration import FOUR_KB
-
-    assert t._writer._buffer_size == FOUR_KB
-    s = t.trace("operation", service="foo")
-    # Maximum string length is set to 10% of the maximum buffer size
-    s.resource = "B" * int(0.1 * FOUR_KB + 1)
-    try:
-        s.finish()
-    except ValueError:
-        pytest.fail()
-    encoded_spans, size = t._writer._encoder.encode()
-    assert size == 1
-    assert b"<dropped string of length 410 because it's too long (max allowed length 409)>" in encoded_spans
-
-
 @parametrize_with_all_encodings
 def test_large_payload_is_sent_without_warning_logs():
     import mock
diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py
@@ -286,3 +286,13 @@ def test_setting_span_tags_and_metrics_generates_no_error_logs():
     s.set_metric("number2", 12.0)
     s.set_metric("number3", "1")
     s.finish()
+
+
+@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
+@pytest.mark.snapshot()
+def test_encode_span_with_large_string_attributes(encoding):
+    from ddtrace import tracer
+
+    with override_global_config(dict(_trace_api=encoding)):
+        with tracer.trace(name="a" * 25000, resource="b" * 25001) as span:
+            span.set_tag(key="c" * 25001, value="d" * 2000)
diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.4].json b/tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.4].json
diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.5].json b/tests/snapshots/tests.integration.test_integration_snapshots.test_encode_span_with_large_string_attributes[v0.5].json

Original file line number	Diff line number	Diff line change
`@@ -4,3 +4,4 @@`
`4`	`4`
`5`	`5`	`MAX_SPAN_META_KEY_LEN = 200`
`6`	`6`	`MAX_SPAN_META_VALUE_LEN = 25000`
	`7`	`+TRUNCATED_SPAN_ATTRIBUTE_LEN = 2500`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +fixes:
 +  - |
 +    tracing: Fixes an issue where span attributes were not truncated before encoding, leading to runtime error and causing spans to be dropped.
 +    Spans with resource name, tag key or value larger than 25000 characters will be truncated to 2500 characters.