Skip to content

Commit 527fa21

Browse files
P403n1x87brettlangdonmergify[bot]
authored
perf(botocore): avoid creating interim dicts (#2589)
The current implementation of the AWS helper for setting tags creates three intermediate dictionaries. The proposed changes does away with them by generating tag names and values from the available data. It also excludes the params.MessageBody tag from the sqs endpoint, which could contain big strings that are costy to encode and send over to the agent. Co-authored-by: Brett Langdon <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
1 parent dae36ec commit 527fa21

File tree

7 files changed

+87
-110
lines changed

7 files changed

+87
-110
lines changed

ddtrace/contrib/botocore/patch.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,10 @@ def patched_api_call(original_func, instance, args, kwargs):
130130
operation = None
131131
if args:
132132
operation = args[0]
133-
span.resource = "%s.%s" % (endpoint_name, operation.lower())
133+
# DEV: join is the fastest way of concatenating strings that is compatible
134+
# across Python versions (see
135+
# https://stackoverflow.com/questions/1316887/what-is-the-most-efficient-string-concatenation-method-in-python)
136+
span.resource = ".".join((endpoint_name, operation.lower()))
134137

135138
if config.botocore["distributed_tracing"]:
136139
if endpoint_name == "lambda" and operation == "Invoke":
@@ -147,12 +150,11 @@ def patched_api_call(original_func, instance, args, kwargs):
147150

148151
region_name = deep_getattr(instance, "meta.region_name")
149152

150-
meta = {
151-
"aws.agent": "botocore",
152-
"aws.operation": operation,
153-
"aws.region": region_name,
154-
}
155-
span.set_tags(meta)
153+
span._set_str_tag("aws.agent", "botocore")
154+
if operation is not None:
155+
span._set_str_tag("aws.operation", operation)
156+
if region_name is not None:
157+
span._set_str_tag("aws.region", region_name)
156158

157159
result = original_func(*args, **kwargs)
158160

ddtrace/contrib/trace_utils.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44
from collections import deque
55
import re
66
from typing import Any
7+
from typing import Callable
78
from typing import Dict
9+
from typing import Generator
10+
from typing import Iterator
811
from typing import Optional
9-
from typing import Set
1012
from typing import TYPE_CHECKING
13+
from typing import Tuple
1114

1215
from ddtrace import Pin
1316
from ddtrace import config
@@ -279,26 +282,33 @@ def activate_distributed_headers(tracer, int_config=None, request_headers=None,
279282
tracer.context_provider.activate(context)
280283

281284

282-
def flatten_dict(
283-
d, # type: Dict[str, Any]
285+
def _flatten(
286+
obj, # type: Any
284287
sep=".", # type: str
285288
prefix="", # type: str
286-
exclude=None, # type: Optional[Set[str]]
289+
exclude_policy=None, # type: Optional[Callable[[str], bool]]
287290
):
288-
# type: (...) -> Dict[str, Any]
289-
"""
290-
Returns a normalized dict of depth 1
291-
"""
292-
flat = {}
291+
# type: (...) -> Generator[Tuple[str, Any], None, None]
293292
s = deque() # type: ignore
294-
s.append((prefix, d))
295-
exclude = exclude or set()
293+
s.append((prefix, obj))
296294
while s:
297295
p, v = s.pop()
298-
if p in exclude:
296+
if exclude_policy is not None and exclude_policy(p):
299297
continue
300298
if isinstance(v, dict):
301-
s.extend((p + sep + k if p else k, v) for k, v in v.items())
299+
s.extend((sep.join((p, k)) if p else k, v) for k, v in v.items())
302300
else:
303-
flat[p] = v
304-
return flat
301+
yield p, v
302+
303+
304+
def set_flattened_tags(
305+
span, # type: Span
306+
items, # type: Iterator[Tuple[str, Any]]
307+
sep=".", # type: str
308+
exclude_policy=None, # type: Optional[Callable[[str], bool]]
309+
processor=None, # type Optional[Callable[[Any], Any]]
310+
):
311+
# type: (...) -> None
312+
for prefix, value in items:
313+
for tag, v in _flatten(value, sep, prefix, exclude_policy):
314+
span.set_tag(tag, processor(v) if processor is not None else v)

ddtrace/ext/aws.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
from typing import Any
2+
from typing import FrozenSet
23
from typing import Set
34
from typing import TYPE_CHECKING
45
from typing import Tuple
56

6-
from ddtrace.contrib.trace_utils import flatten_dict
7+
from ddtrace.contrib.trace_utils import set_flattened_tags
78

89

910
if TYPE_CHECKING:
1011
from ddtrace.span import Span
1112

1213

13-
EXCLUDED_ENDPOINT = {"kms", "sts"}
14+
EXCLUDED_ENDPOINT = frozenset({"kms", "sts"})
1415
EXCLUDED_ENDPOINT_TAGS = {
15-
"s3": {"params.Body"},
16-
"firehose": {"params.Records"},
16+
"firehose": frozenset({"params.Records"}),
1717
}
1818

1919

@@ -37,9 +37,13 @@ def add_span_arg_tags(
3737
):
3838
# type: (...) -> None
3939
if endpoint_name not in EXCLUDED_ENDPOINT:
40-
tags = dict((name, value) for (name, value) in zip(args_names, args) if name in args_traced)
41-
flat_tags = flatten_dict(tags, exclude=EXCLUDED_ENDPOINT_TAGS.get(endpoint_name))
42-
span.set_tags({k: truncate_arg_value(v) for k, v in flat_tags.items()})
40+
exclude_set = EXCLUDED_ENDPOINT_TAGS.get(endpoint_name, frozenset()) # type: FrozenSet[str]
41+
set_flattened_tags(
42+
span,
43+
items=((name, value) for (name, value) in zip(args_names, args) if name in args_traced),
44+
exclude_policy=lambda tag: tag in exclude_set or tag.endswith("Body"),
45+
processor=truncate_arg_value,
46+
)
4347

4448

4549
REGION = "aws.region"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
other:
3+
- |
4+
The botocore integration excludes AWS endpoint call parameters that have a
5+
name ending with ``Body`` from the set of span tags.

tests/benchmarks/test_trace_utils.py

Lines changed: 0 additions & 66 deletions
This file was deleted.

tests/contrib/botocore/test.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ def test_sqs_client(self):
171171
self.assertEqual(len(spans), 1)
172172
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
173173
self.assertEqual(span.get_tag("aws.operation"), "ListQueues")
174+
self.assertIsNone(span.get_tag("params.MessageBody"))
174175
assert_is_measured(span)
175176
assert_span_http_status_code(span, 200)
176177
self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -189,6 +190,7 @@ def test_sqs_send_message_trace_injection_with_no_message_attributes(self):
189190
self.assertEqual(len(spans), 1)
190191
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
191192
self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
193+
self.assertIsNone(span.get_tag("params.MessageBody"))
192194
assert_is_measured(span)
193195
assert_span_http_status_code(span, 200)
194196
self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -219,6 +221,7 @@ def test_sqs_send_message_distributed_tracing_off(self):
219221
self.assertEqual(len(spans), 1)
220222
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
221223
self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
224+
self.assertIsNone(span.get_tag("params.MessageBody"))
222225
assert_is_measured(span)
223226
assert_span_http_status_code(span, 200)
224227
self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -254,6 +257,7 @@ def test_sqs_send_message_trace_injection_with_message_attributes(self):
254257
self.assertEqual(len(spans), 1)
255258
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
256259
self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
260+
self.assertIsNone(span.get_tag("params.MessageBody"))
257261
assert_is_measured(span)
258262
assert_span_http_status_code(span, 200)
259263
self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -294,6 +298,7 @@ def test_sqs_send_message_trace_injection_with_max_message_attributes(self):
294298
self.assertEqual(len(spans), 1)
295299
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
296300
self.assertEqual(span.get_tag("aws.operation"), "SendMessage")
301+
self.assertIsNone(span.get_tag("params.MessageBody"))
297302
assert_is_measured(span)
298303
assert_span_http_status_code(span, 200)
299304
self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -324,6 +329,7 @@ def test_sqs_send_message_batch_trace_injection_with_no_message_attributes(self)
324329
self.assertEqual(len(spans), 1)
325330
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
326331
self.assertEqual(span.get_tag("aws.operation"), "SendMessageBatch")
332+
self.assertIsNone(span.get_tag("params.MessageBody"))
327333
assert_is_measured(span)
328334
assert_span_http_status_code(span, 200)
329335
self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -366,6 +372,7 @@ def test_sqs_send_message_batch_trace_injection_with_message_attributes(self):
366372
self.assertEqual(len(spans), 1)
367373
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
368374
self.assertEqual(span.get_tag("aws.operation"), "SendMessageBatch")
375+
self.assertIsNone(span.get_tag("params.MessageBody"))
369376
assert_is_measured(span)
370377
assert_span_http_status_code(span, 200)
371378
self.assertEqual(span.service, "test-botocore-tracing.sqs")
@@ -409,6 +416,7 @@ def test_sqs_send_message_batch_trace_injection_with_max_message_attributes(self
409416
self.assertEqual(len(spans), 1)
410417
self.assertEqual(span.get_tag("aws.region"), "us-east-1")
411418
self.assertEqual(span.get_tag("aws.operation"), "SendMessageBatch")
419+
self.assertIsNone(span.get_tag("params.MessageBody"))
412420
assert_is_measured(span)
413421
assert_span_http_status_code(span, 200)
414422
self.assertEqual(span.service, "test-botocore-tracing.sqs")

tests/tracer/test_trace_utils.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from hypothesis.strategies import none
77
from hypothesis.strategies import recursive
88
from hypothesis.strategies import text
9+
from hypothesis.strategies import tuples
910
import mock
1011
import pytest
1112

@@ -508,30 +509,43 @@ def test_sanitized_url_in_http_meta(span, int_config):
508509
assert span.meta[http.URL] == FULL_URL
509510

510511

511-
nested_dicts = recursive(
512-
none() | booleans() | floats() | text(),
513-
lambda children: lists(children, min_size=1) | dictionaries(text(), children, min_size=1),
514-
max_leaves=10,
512+
# This generates a list of (key, value) tuples, with values given by nested
513+
# dictionaries
514+
@given(
515+
lists(
516+
tuples(
517+
text(),
518+
recursive(
519+
none() | booleans() | floats() | text(),
520+
lambda children: lists(children, min_size=1) | dictionaries(text(), children, min_size=1),
521+
max_leaves=10,
522+
),
523+
),
524+
max_size=4,
525+
)
515526
)
516-
517-
518-
@given(nested_dicts)
519-
def test_flatten_dict_is_flat(d):
527+
def test_set_flattened_tags_is_flat(items):
520528
"""Ensure that flattening of a nested dict results in a normalized, 1-level dict"""
521-
f = trace_utils.flatten_dict(d)
522-
assert isinstance(f, dict)
523-
assert not any(isinstance(v, dict) for v in f.values())
529+
span = Span(None, "test")
530+
trace_utils.set_flattened_tags(span, items)
531+
assert isinstance(span.meta, dict)
532+
assert not any(isinstance(v, dict) for v in span.meta.values())
524533

525534

526-
def test_flatten_dict_keys():
535+
def test_set_flattened_tags_keys():
527536
"""Ensure expected keys in flattened dictionary"""
528537
d = dict(A=1, B=2, C=dict(A=3, B=4, C=dict(A=5, B=6)))
529538
e = dict(A=1, B=2, C_A=3, C_B=4, C_C_A=5, C_C_B=6)
530-
assert trace_utils.flatten_dict(d, sep="_") == e
539+
span = Span(None, "test")
540+
trace_utils.set_flattened_tags(span, d.items(), sep="_")
541+
assert span.metrics == e
531542

532543

533-
def test_flatten_dict_exclude():
544+
def test_set_flattened_tags_exclude_policy():
534545
"""Ensure expected keys in flattened dictionary with exclusion set"""
535546
d = dict(A=1, B=2, C=dict(A=3, B=4, C=dict(A=5, B=6)))
536547
e = dict(A=1, B=2, C_B=4)
537-
assert trace_utils.flatten_dict(d, sep="_", exclude={"C_A", "C_C"}) == e
548+
span = Span(None, "test")
549+
550+
trace_utils.set_flattened_tags(span, d.items(), sep="_", exclude_policy=lambda tag: tag in {"C_A", "C_C"})
551+
assert span.metrics == e

0 commit comments

Comments
 (0)