perf(botocore): avoid creating interim dicts (#2589)

P403n1x87 · brettlangdon · mergify[bot] · web-flow · commit 527fa21cde5c · 2021-06-30T14:31:42.000Z
The current implementation of the AWS helper for setting tags creates
three intermediate dictionaries. The proposed changes does away with
them by generating tag names and values from the available data. It also
excludes the params.MessageBody tag from the sqs endpoint, which could
contain big strings that are costy to encode and send over to the agent.

Co-authored-by: Brett Langdon &lt;brett.langdon@datadoghq.com&gt;
Co-authored-by: mergify[bot] &lt;37929162+mergify[bot]@users.noreply.github.com&gt;
diff --git a/ddtrace/contrib/botocore/patch.py b/ddtrace/contrib/botocore/patch.py
@@ -130,7 +130,10 @@ def patched_api_call(original_func, instance, args, kwargs):
         operation = None
         if args:
             operation = args[0]
-            span.resource = "%s.%s" % (endpoint_name, operation.lower())
+            # DEV: join is the fastest way of concatenating strings that is compatible
+            # across Python versions (see
+            # https://stackoverflow.com/questions/1316887/what-is-the-most-efficient-string-concatenation-method-in-python)
+            span.resource = ".".join((endpoint_name, operation.lower()))
 
             if config.botocore["distributed_tracing"]:
                 if endpoint_name == "lambda" and operation == "Invoke":
@@ -147,12 +150,11 @@ def patched_api_call(original_func, instance, args, kwargs):
 
         region_name = deep_getattr(instance, "meta.region_name")
 
-        meta = {
-            "aws.agent": "botocore",
-            "aws.operation": operation,
-            "aws.region": region_name,
-        }
-        span.set_tags(meta)
+        span._set_str_tag("aws.agent", "botocore")
+        if operation is not None:
+            span._set_str_tag("aws.operation", operation)
+        if region_name is not None:
+            span._set_str_tag("aws.region", region_name)
 
         result = original_func(*args, **kwargs)
 
diff --git a/ddtrace/contrib/trace_utils.py b/ddtrace/contrib/trace_utils.py
@@ -4,10 +4,13 @@
 from collections import deque
 import re
 from typing import Any
+from typing import Callable
 from typing import Dict
+from typing import Generator
+from typing import Iterator
 from typing import Optional
-from typing import Set
 from typing import TYPE_CHECKING
+from typing import Tuple
 
 from ddtrace import Pin
 from ddtrace import config
@@ -279,26 +282,33 @@ def activate_distributed_headers(tracer, int_config=None, request_headers=None,
             tracer.context_provider.activate(context)
 
 
-def flatten_dict(
-    d,  # type: Dict[str, Any]
+def _flatten(
+    obj,  # type: Any
     sep=".",  # type: str
     prefix="",  # type: str
-    exclude=None,  # type: Optional[Set[str]]
+    exclude_policy=None,  # type: Optional[Callable[[str], bool]]
 ):
-    # type: (...) -> Dict[str, Any]
-    """
-    Returns a normalized dict of depth 1
-    """
-    flat = {}
+    # type: (...) -> Generator[Tuple[str, Any], None, None]
     s = deque()  # type: ignore
-    s.append((prefix, d))
-    exclude = exclude or set()
+    s.append((prefix, obj))
     while s:
         p, v = s.pop()
-        if p in exclude:
+        if exclude_policy is not None and exclude_policy(p):
             continue
         if isinstance(v, dict):
-            s.extend((p + sep + k if p else k, v) for k, v in v.items())
+            s.extend((sep.join((p, k)) if p else k, v) for k, v in v.items())
         else:
-            flat[p] = v
-    return flat
+            yield p, v
+
+
+def set_flattened_tags(
+    span,  # type: Span
+    items,  # type: Iterator[Tuple[str, Any]]
+    sep=".",  # type: str
+    exclude_policy=None,  # type: Optional[Callable[[str], bool]]
+    processor=None,  # type Optional[Callable[[Any], Any]]
+):
+    # type: (...) -> None
+    for prefix, value in items:
+        for tag, v in _flatten(value, sep, prefix, exclude_policy):
+            span.set_tag(tag, processor(v) if processor is not None else v)
diff --git a/ddtrace/ext/aws.py b/ddtrace/ext/aws.py
@@ -1,19 +1,19 @@
 from typing import Any
+from typing import FrozenSet
 from typing import Set
 from typing import TYPE_CHECKING
 from typing import Tuple
 
-from ddtrace.contrib.trace_utils import flatten_dict
+from ddtrace.contrib.trace_utils import set_flattened_tags
 
 
 if TYPE_CHECKING:
     from ddtrace.span import Span
 
 
-EXCLUDED_ENDPOINT = {"kms", "sts"}
+EXCLUDED_ENDPOINT = frozenset({"kms", "sts"})
 EXCLUDED_ENDPOINT_TAGS = {
-    "s3": {"params.Body"},
-    "firehose": {"params.Records"},
+    "firehose": frozenset({"params.Records"}),
 }
 
 
@@ -37,9 +37,13 @@ def add_span_arg_tags(
 ):
     # type: (...) -> None
     if endpoint_name not in EXCLUDED_ENDPOINT:
-        tags = dict((name, value) for (name, value) in zip(args_names, args) if name in args_traced)
-        flat_tags = flatten_dict(tags, exclude=EXCLUDED_ENDPOINT_TAGS.get(endpoint_name))
-        span.set_tags({k: truncate_arg_value(v) for k, v in flat_tags.items()})
+        exclude_set = EXCLUDED_ENDPOINT_TAGS.get(endpoint_name, frozenset())  # type: FrozenSet[str]
+        set_flattened_tags(
+            span,
+            items=((name, value) for (name, value) in zip(args_names, args) if name in args_traced),
+            exclude_policy=lambda tag: tag in exclude_set or tag.endswith("Body"),
+            processor=truncate_arg_value,
+        )
 
 
 REGION = "aws.region"
diff --git a/releasenotes/notes/botocore-no-body-params-a440cd8b7fb9bd68.yaml b/releasenotes/notes/botocore-no-body-params-a440cd8b7fb9bd68.yaml
@@ -0,0 +1,5 @@
+---
+other:
+  - |
+    The botocore integration excludes AWS endpoint call parameters that have a
+    name ending with ``Body`` from the set of span tags.
diff --git a/tests/benchmarks/test_trace_utils.py b/tests/benchmarks/test_trace_utils.py
diff --git a/tests/contrib/botocore/test.py b/tests/contrib/botocore/test.py
@@ -171,6 +171,7 @@ def test_sqs_client(self):
         self.assertEqual(len(spans), 1)
         self.assertEqual(span.get_tag("aws.region"), "us-east-1")
         self.assertEqual(span.get_tag("aws.operation"), "ListQueues")
+        self.assertIsNone(span.get_tag("params.MessageBody"))
         assert_is_measured(span)
         assert_span_http_status_code(span, 200)
         self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -189,6 +190,7 @@ def test_sqs_send_message_trace_injection_with_no_message_attributes(self):
         self.assertEqual(len(spans), 1)
         self.assertEqual(span.get_tag("aws.region"), "us-east-1")
         self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
+        self.assertIsNone(span.get_tag("params.MessageBody"))
         assert_is_measured(span)
         assert_span_http_status_code(span, 200)
         self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -219,6 +221,7 @@ def test_sqs_send_message_distributed_tracing_off(self):
             self.assertEqual(len(spans), 1)
             self.assertEqual(span.get_tag("aws.region"), "us-east-1")
             self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
+            self.assertIsNone(span.get_tag("params.MessageBody"))
             assert_is_measured(span)
             assert_span_http_status_code(span, 200)
             self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -254,6 +257,7 @@ def test_sqs_send_message_trace_injection_with_message_attributes(self):
         self.assertEqual(len(spans), 1)
         self.assertEqual(span.get_tag("aws.region"), "us-east-1")
         self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
+        self.assertIsNone(span.get_tag("params.MessageBody"))
         assert_is_measured(span)
         assert_span_http_status_code(span, 200)
         self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -294,6 +298,7 @@ def test_sqs_send_message_trace_injection_with_max_message_attributes(self):
         self.assertEqual(len(spans), 1)
         self.assertEqual(span.get_tag("aws.region"), "us-east-1")
         self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
+        self.assertIsNone(span.get_tag("params.MessageBody"))
         assert_is_measured(span)
         assert_span_http_status_code(span, 200)
         self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -324,6 +329,7 @@ def test_sqs_send_message_batch_trace_injection_with_no_message_attributes(self)
         self.assertEqual(len(spans), 1)
         self.assertEqual(span.get_tag("aws.region"), "us-east-1")
         self.assertEqual(span.get_tag("aws.operation"), "SendMessageBatch")
+        self.assertIsNone(span.get_tag("params.MessageBody"))
         assert_is_measured(span)
         assert_span_http_status_code(span, 200)
         self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -366,6 +372,7 @@ def test_sqs_send_message_batch_trace_injection_with_message_attributes(self):
         self.assertEqual(len(spans), 1)
         self.assertEqual(span.get_tag("aws.region"), "us-east-1")
         self.assertEqual(span.get_tag("aws.operation"), "SendMessageBatch")
+        self.assertIsNone(span.get_tag("params.MessageBody"))
         assert_is_measured(span)
         assert_span_http_status_code(span, 200)
         self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -409,6 +416,7 @@ def test_sqs_send_message_batch_trace_injection_with_max_message_attributes(self
         self.assertEqual(len(spans), 1)
         self.assertEqual(span.get_tag("aws.region"), "us-east-1")
         self.assertEqual(span.get_tag("aws.operation"), "SendMessageBatch")
+        self.assertIsNone(span.get_tag("params.MessageBody"))
         assert_is_measured(span)
         assert_span_http_status_code(span, 200)
         self.assertEqual(span.service, "test-botocore-tracing.sqs")
diff --git a/tests/tracer/test_trace_utils.py b/tests/tracer/test_trace_utils.py
@@ -6,6 +6,7 @@
 from hypothesis.strategies import none
 from hypothesis.strategies import recursive
 from hypothesis.strategies import text
+from hypothesis.strategies import tuples
 import mock
 import pytest
 
@@ -508,30 +509,43 @@ def test_sanitized_url_in_http_meta(span, int_config):
     assert span.meta[http.URL] == FULL_URL
 
 
-nested_dicts = recursive(
-    none() | booleans() | floats() | text(),
-    lambda children: lists(children, min_size=1) | dictionaries(text(), children, min_size=1),
-    max_leaves=10,
+# This generates a list of (key, value) tuples, with values given by nested
+# dictionaries
+@given(
+    lists(
+        tuples(
+            text(),
+            recursive(
+                none() | booleans() | floats() | text(),
+                lambda children: lists(children, min_size=1) | dictionaries(text(), children, min_size=1),
+                max_leaves=10,
+            ),
+        ),
+        max_size=4,
+    )
 )
-
-
-@given(nested_dicts)
-def test_flatten_dict_is_flat(d):
+def test_set_flattened_tags_is_flat(items):
     """Ensure that flattening of a nested dict results in a normalized, 1-level dict"""
-    f = trace_utils.flatten_dict(d)
-    assert isinstance(f, dict)
-    assert not any(isinstance(v, dict) for v in f.values())
+    span = Span(None, "test")
+    trace_utils.set_flattened_tags(span, items)
+    assert isinstance(span.meta, dict)
+    assert not any(isinstance(v, dict) for v in span.meta.values())
 
 
-def test_flatten_dict_keys():
+def test_set_flattened_tags_keys():
     """Ensure expected keys in flattened dictionary"""
     d = dict(A=1, B=2, C=dict(A=3, B=4, C=dict(A=5, B=6)))
     e = dict(A=1, B=2, C_A=3, C_B=4, C_C_A=5, C_C_B=6)
-    assert trace_utils.flatten_dict(d, sep="_") == e
+    span = Span(None, "test")
+    trace_utils.set_flattened_tags(span, d.items(), sep="_")
+    assert span.metrics == e
 
 
-def test_flatten_dict_exclude():
+def test_set_flattened_tags_exclude_policy():
     """Ensure expected keys in flattened dictionary with exclusion set"""
     d = dict(A=1, B=2, C=dict(A=3, B=4, C=dict(A=5, B=6)))
     e = dict(A=1, B=2, C_B=4)
-    assert trace_utils.flatten_dict(d, sep="_", exclude={"C_A", "C_C"}) == e
+    span = Span(None, "test")
+
+    trace_utils.set_flattened_tags(span, d.items(), sep="_", exclude_policy=lambda tag: tag in {"C_A", "C_C"})
+    assert span.metrics == e

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +other:
 +  - |
 +    The botocore integration excludes AWS endpoint call parameters that have a
 +    name ending with ``Body`` from the set of span tags.