From 619d0416ec87774c5ce365429c13e53e18b82a44 Mon Sep 17 00:00:00 2001 From: brettlangdon Date: Wed, 22 Oct 2025 08:35:07 -0400 Subject: [PATCH 1/2] chore(tracing): move meta/metrics type checking to the encoder The benefit here is we can do a first pass on meta/metrics to filter out bad values before we try to pack them, this way we can throw out individual bad values instead of throwing out the whole span/trace. --- ddtrace/internal/_encoding.pyx | 138 ++++++++++++------ .../integration/test_integration_snapshots.py | 31 ++-- 2 files changed, 110 insertions(+), 59 deletions(-) diff --git a/ddtrace/internal/_encoding.pyx b/ddtrace/internal/_encoding.pyx index 6b11aaf94d1..932bb42a86c 100644 --- a/ddtrace/internal/_encoding.pyx +++ b/ddtrace/internal/_encoding.pyx @@ -22,6 +22,7 @@ from ..constants import _ORIGIN_KEY as ORIGIN_KEY from .constants import SPAN_LINKS_KEY from .constants import SPAN_EVENTS_KEY from .constants import MAX_UINT_64BITS +from .logger import get_logger from .._trace._limits import MAX_SPAN_META_VALUE_LEN from .._trace._limits import TRUNCATED_SPAN_ATTRIBUTE_LEN from ..settings._agent import config as agent_config @@ -30,6 +31,8 @@ from ..settings._agent import config as agent_config DEF MSGPACK_ARRAY_LENGTH_PREFIX_SIZE = 5 DEF MSGPACK_STRING_TABLE_LENGTH_PREFIX_SIZE = 6 +cdef object log = get_logger(__name__) + cdef extern from "Python.h": const char* PyUnicode_AsUTF8(object o) @@ -703,59 +706,80 @@ cdef class MsgpackEncoderV04(MsgpackEncoderBase): cdef Py_ssize_t L cdef int ret cdef dict d + cdef list m - if PyDict_CheckExact(meta): - d = meta - L = len(d) + (dd_origin is not NULL) + (len(span_events) > 0) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") + if not PyDict_CheckExact(meta): + raise TypeError("Unhandled meta type: %r" % type(meta)) - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in d.items(): - ret = pack_text(&self.pk, k) - if ret != 0: - break - ret = pack_text(&self.pk, v) - if ret != 0: - break - if dd_origin is not NULL: - ret = pack_bytes(&self.pk, _ORIGIN_KEY, _ORIGIN_KEY_LEN) - if ret == 0: - ret = pack_bytes(&self.pk, dd_origin, strlen(dd_origin)) - if ret != 0: - return ret - if span_events: - ret = pack_text(&self.pk, SPAN_EVENTS_KEY) - if ret == 0: - ret = pack_text(&self.pk, span_events) - return ret + d = meta - raise TypeError("Unhandled meta type: %r" % type(meta)) + # Filter meta to only str/bytes values + m = [] + for k, v in d.items(): + if PyUnicode_Check(v) or PyBytesLike_Check(v): + m.append((k, v)) + else: + log.warning("Meta key %r has non-string value %r, skipping", k, v) + + L = len(m) + (dd_origin is not NULL) + (len(span_events) > 0) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in m: + ret = pack_text(&self.pk, k) + if ret != 0: + break + ret = pack_text(&self.pk, v) + if ret != 0: + break + if dd_origin is not NULL: + ret = pack_bytes(&self.pk, _ORIGIN_KEY, _ORIGIN_KEY_LEN) + if ret == 0: + ret = pack_bytes(&self.pk, dd_origin, strlen(dd_origin)) + if ret != 0: + return ret + if span_events: + ret = pack_text(&self.pk, SPAN_EVENTS_KEY) + if ret == 0: + ret = pack_text(&self.pk, span_events) + return ret cdef inline int _pack_metrics(self, object metrics) except? -1: cdef Py_ssize_t L cdef int ret cdef dict d + cdef list m - if PyDict_CheckExact(metrics): - d = metrics - L = len(d) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") + if not PyDict_CheckExact(metrics): + raise TypeError("Unhandled metrics type: %r" % type(metrics)) - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in d.items(): - ret = pack_text(&self.pk, k) - if ret != 0: - break - ret = pack_number(&self.pk, v) - if ret != 0: - break - return ret - raise TypeError("Unhandled metrics type: %r" % type(metrics)) + d = metrics + m = [] + + # Filter metrics to only number values + for k, v in d.items(): + if PyLong_Check(v) or PyFloat_Check(v): + m.append((k, v)) + else: + log.warning("Metric key %r has non-numeric value %r, skipping", k, v) + + L = len(m) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in m: + ret = pack_text(&self.pk, k) + if ret != 0: + break + ret = pack_number(&self.pk, v) + if ret != 0: + break + return ret cdef int pack_span(self, object span, unsigned long long trace_id_64bits, void *dd_origin) except? -1: cdef int ret @@ -1035,6 +1059,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase): cdef int pack_span(self, object span, unsigned long long trace_id_64bits, void *dd_origin) except? -1: cdef int ret + cdef list meta, metrics ret = msgpack_pack_array(&self.pk, 12) if ret != 0: @@ -1089,14 +1114,22 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase): if span._events: span_events = json_dumps([vars(event)() for event in span._events]) + # Filter meta to only str/bytes values + meta = [] + for k, v in span._meta.items(): + if PyUnicode_Check(v) or PyBytesLike_Check(v): + meta.append((k, v)) + else: + log.warning("Meta key %r has non-string value %r, skipping", k, v) + ret = msgpack_pack_map( &self.pk, - len(span._meta) + (dd_origin is not NULL) + (len(span_links) > 0) + (len(span_events) > 0) + len(meta) + (dd_origin is not NULL) + (len(span_links) > 0) + (len(span_events) > 0) ) if ret != 0: return ret - if span._meta: - for k, v in span._meta.items(): + if meta: + for k, v in meta: ret = self._pack_string(k) if ret != 0: return ret @@ -1125,11 +1158,20 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase): if ret != 0: return ret - ret = msgpack_pack_map(&self.pk, len(span._metrics)) + + # Filter metrics to only number values + metrics = [] + for k, v in span._metrics.items(): + if PyLong_Check(v) or PyFloat_Check(v): + metrics.append((k, v)) + else: + log.warning("Metric key %r has non-numeric value %r, skipping", k, v) + + ret = msgpack_pack_map(&self.pk, len(metrics)) if ret != 0: return ret - if span._metrics: - for k, v in span._metrics.items(): + if metrics: + for k, v in metrics: ret = self._pack_string(k) if ret != 0: return ret diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py index 8bb70cf70a6..2560f299a8e 100644 --- a/tests/integration/test_integration_snapshots.py +++ b/tests/integration/test_integration_snapshots.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import logging import os import mock @@ -216,42 +217,50 @@ def test_wrong_span_name_type_not_sent(): ({"env": "my-env", "tag1": "some_str_1", "tag2": "some_str_2", "tag3": [1, 2, 3]}), ({"env": "test-env", b"tag1": {"wrong_type": True}, b"tag2": "some_str_2", b"tag3": "some_str_3"}), ({"env": "my-test-env", "😐": "some_str_1", b"tag2": "some_str_2", "unicode": 12345}), + ({"env": set([1, 2, 3])}), + ({"env": None}), + ({"env": True}), + ({"env": 1.0}), ], ) @pytest.mark.parametrize("encoding", ["v0.4", "v0.5"]) def test_trace_with_wrong_meta_types_not_sent(encoding, meta, monkeypatch): """Wrong meta types should raise TypeErrors during encoding and fail to send to the agent.""" with override_global_config(dict(_trace_api=encoding)): - with mock.patch("ddtrace._trace.span.log") as log: + logger = logging.getLogger("ddtrace.internal._encoding") + with mock.patch.object(logger, "warning") as log_warning: with tracer.trace("root") as root: root._meta = meta for _ in range(299): with tracer.trace("child") as child: child._meta = meta - log.exception.assert_called_once_with("error closing trace") + + assert log_warning.call_count == 300 + log_warning.assert_called_with("Meta key %r has non-string value %r, skipping", mock.ANY, mock.ANY) @pytest.mark.parametrize( - "metrics", + "metrics,expected_warning_count", [ - ({"num1": 12345, "num2": 53421, "num3": 1, "num4": "not-a-number"}), - ({b"num1": 123.45, b"num2": [1, 2, 3], b"num3": 11.0, b"num4": 1.20}), - ({"😐": "123.45", b"num2": "1", "num3": {"is_number": False}, "num4": "12345"}), + ({"num1": 12345, "num2": 53421, "num3": 1, "num4": "not-a-number"}, 300), + ({b"num1": 123.45, b"num2": [1, 2, 3], b"num3": 11.0, b"num4": 1.20}, 300), + ({"😐": "123.45", b"num2": "1", "num3": {"is_number": False}, "num4": "12345"}, 1200), ], ) @pytest.mark.parametrize("encoding", ["v0.4", "v0.5"]) -@snapshot() -@pytest.mark.xfail -def test_trace_with_wrong_metrics_types_not_sent(encoding, metrics, monkeypatch): +def test_trace_with_wrong_metrics_types_not_sent(encoding, metrics, expected_warning_count): """Wrong metric types should raise TypeErrors during encoding and fail to send to the agent.""" with override_global_config(dict(_trace_api=encoding)): - with mock.patch("ddtrace._trace.span.log") as log: + logger = logging.getLogger("ddtrace.internal._encoding") + with mock.patch.object(logger, "warning") as log_warning: with tracer.trace("root") as root: root._metrics = metrics for _ in range(299): with tracer.trace("child") as child: child._metrics = metrics - log.exception.assert_called_once_with("error closing trace") + + assert log_warning.call_count == expected_warning_count + log_warning.assert_called_with("Metric key %r has non-numeric value %r, skipping", mock.ANY, mock.ANY) @pytest.mark.subprocess() From 06576107b7511da4ca1d2b176e99ef6583ed34b7 Mon Sep 17 00:00:00 2001 From: brettlangdon Date: Wed, 22 Oct 2025 11:52:31 -0400 Subject: [PATCH 2/2] fix linting --- ddtrace/internal/_encoding.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/ddtrace/internal/_encoding.pyx b/ddtrace/internal/_encoding.pyx index 932bb42a86c..4d89e0c27cd 100644 --- a/ddtrace/internal/_encoding.pyx +++ b/ddtrace/internal/_encoding.pyx @@ -755,7 +755,6 @@ cdef class MsgpackEncoderV04(MsgpackEncoderBase): if not PyDict_CheckExact(metrics): raise TypeError("Unhandled metrics type: %r" % type(metrics)) - d = metrics m = [] @@ -1158,7 +1157,6 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase): if ret != 0: return ret - # Filter metrics to only number values metrics = [] for k, v in span._metrics.items():