fix(internal) cdef Python exceptions being ignored (#3008) (#3014)

mergify[bot] · brettlangdon · web-flow · commit a7f7b68e24a9 · 2021-11-15T17:17:08.000-05:00
* fix(internal) cdef Python exceptions being ignored In a cdef function `raise` statements will create a Python exception, add it to the exception stack, and then immediately return. However, the caller will not explicitly check to see if any exception has occurred unless an exception return value has been provided. Until now these cdef functions when they encountered a `raise` would push the Python exception, and then return the default `int` value of `0`. Which based on how we use the return values `0` means success. This means that anytime we encounter an exception in our encoder we potentially ignore any exceptions and still append the partially encoded trace into the buffer. With this change we are standardizing around `except? -1` which tells the caller that their _might_ be a Python exception if the return value is `-1`. We do this because the `msgpack_*` functions we call will also return `-1` on error, but will not create a Python exception. * add release note * Update integration tests * remove unused import * set file encoding * Update releasenotes/notes/fix-encoding-exceptions-c12900b38741d2bf.yaml Co-authored-by: Tahir H. Butt <tahir.butt@datadoghq.com> Co-authored-by: Tahir H. Butt <tahir.butt@datadoghq.com> (cherry picked from commit 1b6cec4) Co-authored-by: Brett Langdon <brett.langdon@datadoghq.com>
diff --git a/ddtrace/internal/_encoding.pyx b/ddtrace/internal/_encoding.pyx
@@ -78,7 +78,7 @@ cdef inline int pack_bytes(msgpack_packer *pk, char *bs, Py_ssize_t l):
     return ret
 
 
-cdef inline int pack_number(msgpack_packer *pk, object n):
+cdef inline int pack_number(msgpack_packer *pk, object n) except? -1:
     if n is None:
         return msgpack_pack_nil(pk)
 
@@ -101,7 +101,7 @@ cdef inline int pack_number(msgpack_packer *pk, object n):
     raise TypeError("Unhandled numeric type: %r" % type(n))
 
 
-cdef inline int pack_text(msgpack_packer *pk, object text):
+cdef inline int pack_text(msgpack_packer *pk, object text) except? -1:
     cdef Py_ssize_t L
     cdef int ret
 
@@ -422,7 +422,7 @@ cdef class MsgpackEncoderBase(BufferedEncoder):
     cdef void * get_dd_origin_ref(self, str dd_origin):
         raise NotImplementedError()
 
-    cdef inline int _pack_trace(self, list trace):
+    cdef inline int _pack_trace(self, list trace) except? -1:
         cdef int ret
         cdef Py_ssize_t L
         cdef void * dd_origin = NULL
@@ -482,7 +482,7 @@ cdef class MsgpackEncoderBase(BufferedEncoder):
     cpdef flush(self):
         raise NotImplementedError()
 
-    cdef int pack_span(self, object span, void *dd_origin):
+    cdef int pack_span(self, object span, void *dd_origin) except? -1:
         raise NotImplementedError()
 
 
@@ -496,7 +496,7 @@ cdef class MsgpackEncoderV03(MsgpackEncoderBase):
     cdef void * get_dd_origin_ref(self, str dd_origin):
         return string_to_buff(dd_origin)
 
-    cdef inline int _pack_meta(self, object meta, char *dd_origin):
+    cdef inline int _pack_meta(self, object meta, char *dd_origin) except? -1:
         cdef Py_ssize_t L
         cdef int ret
         cdef dict d
@@ -524,7 +524,7 @@ cdef class MsgpackEncoderV03(MsgpackEncoderBase):
 
         raise TypeError("Unhandled meta type: %r" % type(meta))
 
-    cdef inline int _pack_metrics(self, object metrics):
+    cdef inline int _pack_metrics(self, object metrics) except? -1:
         cdef Py_ssize_t L
         cdef int ret
         cdef dict d
@@ -546,7 +546,7 @@ cdef class MsgpackEncoderV03(MsgpackEncoderBase):
 
         raise TypeError("Unhandled metrics type: %r" % type(metrics))
 
-    cdef int pack_span(self, object span, void *dd_origin):
+    cdef int pack_span(self, object span, void *dd_origin) except? -1:
         cdef int ret
         cdef Py_ssize_t L
         cdef int has_span_type
@@ -662,7 +662,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
     cdef void * get_dd_origin_ref(self, str dd_origin):
         return <void *> PyLong_AsLong(self._st._index(dd_origin))
 
-    cdef int pack_span(self, object span, void *dd_origin):
+    cdef int pack_span(self, object span, void *dd_origin) except? -1:
         cdef int ret
 
         ret = msgpack_pack_array(&self.pk, 12)
diff --git a/releasenotes/notes/fix-encoding-exceptions-c12900b38741d2bf.yaml b/releasenotes/notes/fix-encoding-exceptions-c12900b38741d2bf.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    Fix handling of Python exceptions during trace encoding. The tracer will no longer silently fail to encode invalid span data and instead log an exception.
diff --git a/tests/integration/test_encoding.py b/tests/integration/test_encoding.py
@@ -5,7 +5,6 @@
 import pytest
 
 from ddtrace import Tracer
-from ddtrace.internal import agent
 
 
 AGENT_VERSION = os.environ.get("AGENT_VERSION")
@@ -64,68 +63,3 @@ def test_trace_with_metrics_accepted_by_agent(self, metrics):
             tracer.shutdown()
         log.warning.assert_not_called()
         log.error.assert_not_called()
-
-
-@pytest.mark.skipif(AGENT_VERSION == "testagent", reason="Test agent doesn't return 400 response for bad trace")
-class TestTraceRejectedByAgent:
-    def _assert_bad_trace_refused_by_agent(self, mock_log):
-        """Assert that agent refused a bad trace via log call."""
-        calls = [
-            mock.call(
-                "failed to send traces to Datadog Agent at %s: HTTP error status %s, reason %s",
-                "{}/{}/traces".format(agent.get_trace_url(), "v0.4"),
-                400,
-                "Bad Request",
-            )
-        ]
-        mock_log.error.assert_has_calls(calls)
-
-    def test_wrong_span_name_type_refused_by_agent(self):
-        """Span names should be a text type."""
-        tracer = Tracer()
-        with mock.patch("ddtrace.internal.writer.log") as log:
-            with tracer.trace(123):
-                pass
-            tracer.shutdown()
-
-        self._assert_bad_trace_refused_by_agent(log)
-
-    @pytest.mark.parametrize(
-        "meta",
-        [
-            ({"env": "my-env", "tag1": "some_str_1", "tag2": "some_str_2", "tag3": [1, 2, 3]}),
-            ({"env": "test-env", b"tag1": {"wrong_type": True}, b"tag2": "some_str_2", b"tag3": "some_str_3"}),
-            ({"env": "my-test-env", u"😐": "some_str_1", b"tag2": "some_str_2", "unicode": 12345}),
-        ],
-    )
-    def test_trace_with_wrong_meta_types_refused_by_agent(self, meta):
-        tracer = Tracer()
-        with mock.patch("ddtrace.internal.writer.log") as log:
-            with tracer.trace("root") as root:
-                root.meta = meta
-                for _ in range(499):
-                    with tracer.trace("child") as child:
-                        child.meta = meta
-            tracer.shutdown()
-
-        self._assert_bad_trace_refused_by_agent(log)
-
-    @pytest.mark.parametrize(
-        "metrics",
-        [
-            ({"num1": 12345, "num2": 53421, "num3": 1, "num4": "not-a-number"}),
-            ({b"num1": 123.45, b"num2": [1, 2, 3], b"num3": 11.0, b"num4": 1.20}),
-            ({u"😐": "123.45", b"num2": "1", "num3": {"is_number": False}, "num4": "12345"}),
-        ],
-    )
-    def test_trace_with_wrong_metrics_types_refused_by_agent(self, metrics):
-        tracer = Tracer()
-        with mock.patch("ddtrace.internal.writer.log") as log:
-            with tracer.trace("root") as root:
-                root.metrics = metrics
-                for _ in range(499):
-                    with tracer.trace("child") as child:
-                        child.metrics = metrics
-            tracer.shutdown()
-
-        self._assert_bad_trace_refused_by_agent(log)
diff --git a/tests/integration/test_integration_snapshots.py b/tests/integration/test_integration_snapshots.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
 import multiprocessing
 
+import mock
 import pytest
 
 from ddtrace import Tracer
@@ -221,3 +223,53 @@ def task2(tracer):
         p.start()
         p.join()
     tracer.shutdown()
+
+
+@snapshot()
+def test_wrong_span_name_type_not_sent():
+    """Span names should be a text type."""
+    tracer = Tracer()
+    with mock.patch("ddtrace.span.log") as log:
+        with tracer.trace(123):
+            pass
+        log.exception.assert_called_once_with("error closing trace")
+
+
+@pytest.mark.parametrize(
+    "meta",
+    [
+        ({"env": "my-env", "tag1": "some_str_1", "tag2": "some_str_2", "tag3": [1, 2, 3]}),
+        ({"env": "test-env", b"tag1": {"wrong_type": True}, b"tag2": "some_str_2", b"tag3": "some_str_3"}),
+        ({"env": "my-test-env", u"😐": "some_str_1", b"tag2": "some_str_2", "unicode": 12345}),
+    ],
+)
+@snapshot()
+def test_trace_with_wrong_meta_types_not_sent(meta):
+    tracer = Tracer()
+    with mock.patch("ddtrace.span.log") as log:
+        with tracer.trace("root") as root:
+            root.meta = meta
+            for _ in range(499):
+                with tracer.trace("child") as child:
+                    child.meta = meta
+        log.exception.assert_called_once_with("error closing trace")
+
+
+@pytest.mark.parametrize(
+    "metrics",
+    [
+        ({"num1": 12345, "num2": 53421, "num3": 1, "num4": "not-a-number"}),
+        ({b"num1": 123.45, b"num2": [1, 2, 3], b"num3": 11.0, b"num4": 1.20}),
+        ({u"😐": "123.45", b"num2": "1", "num3": {"is_number": False}, "num4": "12345"}),
+    ],
+)
+@snapshot()
+def test_trace_with_wrong_metrics_types_not_sent(metrics):
+    tracer = Tracer()
+    with mock.patch("ddtrace.span.log") as log:
+        with tracer.trace("root") as root:
+            root.metrics = metrics
+            for _ in range(499):
+                with tracer.trace("child") as child:
+                    child.metrics = metrics
+        log.exception.assert_called_once_with("error closing trace")
diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_meta_types_not_sent.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_meta_types_not_sent.json
@@ -0,0 +1 @@
+[]
diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_metrics_types_not_sent.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_trace_with_wrong_metrics_types_not_sent.json
@@ -0,0 +1 @@
+[]
diff --git a/tests/snapshots/tests.integration.test_integration_snapshots.test_wrong_span_name_type_not_sent.json b/tests/snapshots/tests.integration.test_integration_snapshots.test_wrong_span_name_type_not_sent.json
@@ -0,0 +1 @@
+[]
diff --git a/tests/tracer/test_encoders.py b/tests/tracer/test_encoders.py
@@ -565,3 +565,33 @@ def test_list_string_table():
     string_table_test(t)
 
     assert list(t) == ["", "foobar", "foobaz"]
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"trace_id": "trace_id"},
+        {"span_id": "span_id"},
+        {"parent_id": "parent_id"},
+        {"service": True},
+        {"resource": 50},
+        {"name": [1, 2, 3]},
+        {"start_ns": "start_time"},
+        {"duration_ns": "duration_time"},
+        {"span_type": 100},
+        {"meta": {"num": 100}},
+        {"metrics": {"key": "value"}},
+    ],
+)
+def test_encoding_invalid_data(data):
+    encoder = MsgpackEncoderV03(1 << 20, 1 << 20)
+
+    span = Span(tracer=None, name="test")
+    for key, value in data.items():
+        setattr(span, key, value)
+
+    trace = [span]
+    with pytest.raises(TypeError):
+        encoder.put(trace)
+
+    assert encoder.encode() is None

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +fixes:
 +  - |
 +    Fix handling of Python exceptions during trace encoding. The tracer will no longer silently fail to encode invalid span data and instead log an exception.