Skip to content

Commit 3a04de7

Browse files
authored
fix: decode bytes strings when using JSON encoder (backport #3117) (#3177)
This is an automatic backport of pull request #3117 done by [Mergify](https://mergify.com). --- <details> <summary>Mergify commands and options</summary> <br /> More conditions and actions can be found in the [documentation](https://docs.mergify.com/). You can also trigger Mergify actions by commenting on this pull request: - `@Mergifyio refresh` will re-evaluate the rules - `@Mergifyio rebase` will rebase this PR on its base branch - `@Mergifyio update` will merge the base branch into this PR - `@Mergifyio backport <destination>` will backport this PR on `<destination>` branch Additionally, on Mergify [dashboard](https://dashboard.mergify.com/) you can: - look at your merge queues - generate the Mergify configuration with the config editor. Finally, you can contact us on https://mergify.com </details>
1 parent 4796e69 commit 3a04de7

File tree

3 files changed

+110
-25
lines changed

3 files changed

+110
-25
lines changed

ddtrace/internal/encoding.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
from ._encoding import ListStringTable
99
from ._encoding import MsgpackEncoderV03
1010
from ._encoding import MsgpackEncoderV05
11+
from .compat import PY3
12+
from .compat import binary_type
13+
from .compat import ensure_text
1114
from .logger import get_logger
1215

1316

@@ -48,17 +51,33 @@ def encode(self, obj):
4851
raise NotImplementedError()
4952

5053

51-
class JSONEncoder(_EncoderBase):
54+
class JSONEncoder(json.JSONEncoder, _EncoderBase):
5255
content_type = "application/json"
5356

5457
def encode_traces(self, traces):
55-
normalized_traces = [[span.to_dict() for span in trace] for trace in traces]
58+
normalized_traces = [[JSONEncoder._normalize_span(span.to_dict()) for span in trace] for trace in traces]
5659
return self.encode(normalized_traces)
5760

5861
@staticmethod
59-
def encode(obj):
60-
# type: (Any) -> str
61-
return json.dumps(obj)
62+
def _normalize_span(span):
63+
# Ensure all string attributes are actually strings and not bytes
64+
# DEV: We are deferring meta/metrics to reduce any performance issues.
65+
# Meta/metrics may still contain `bytes` and have encoding issues.
66+
span["resource"] = JSONEncoder._normalize_str(span["resource"])
67+
span["name"] = JSONEncoder._normalize_str(span["name"])
68+
span["service"] = JSONEncoder._normalize_str(span["service"])
69+
return span
70+
71+
@staticmethod
72+
def _normalize_str(obj):
73+
if obj is None:
74+
return obj
75+
76+
if PY3:
77+
return ensure_text(obj, errors="backslashreplace")
78+
elif isinstance(obj, binary_type):
79+
return obj.decode("utf-8", errors="replace")
80+
return obj
6281

6382

6483
class JSONEncoderV2(JSONEncoder):
@@ -77,6 +96,7 @@ def encode_traces(self, traces):
7796
def _convert_span(span):
7897
# type: (Span) -> Dict[str, Any]
7998
sp = span.to_dict()
99+
sp = JSONEncoderV2._normalize_span(sp)
80100
sp["trace_id"] = JSONEncoderV2._encode_id_to_hex(sp.get("trace_id"))
81101
sp["parent_id"] = JSONEncoderV2._encode_id_to_hex(sp.get("parent_id"))
82102
sp["span_id"] = JSONEncoderV2._encode_id_to_hex(sp.get("span_id"))
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
Fix JSON encoding error when a ``bytes`` string is used for span metadata.

tests/tracer/test_encoders.py

Lines changed: 81 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -161,19 +161,20 @@ class TestEncoders(TestCase):
161161

162162
def test_encode_traces_json(self):
163163
# test encoding for JSON format
164-
traces = []
165-
traces.append(
164+
traces = [
166165
[
167166
Span(name="client.testing", tracer=None),
168167
Span(name="client.testing", tracer=None),
169-
]
170-
)
171-
traces.append(
168+
],
172169
[
173170
Span(name="client.testing", tracer=None),
174171
Span(name="client.testing", tracer=None),
175-
]
176-
)
172+
],
173+
[
174+
Span(name=b"client.testing", tracer=None),
175+
Span(name=b"client.testing", tracer=None),
176+
],
177+
]
177178

178179
encoder = JSONEncoder()
179180
spans = encoder.encode_traces(traces)
@@ -182,39 +183,42 @@ def test_encode_traces_json(self):
182183
# test the encoded output that should be a string
183184
# and the output must be flatten
184185
assert isinstance(spans, string_type)
185-
assert len(items) == 2
186+
assert len(items) == 3
186187
assert len(items[0]) == 2
187188
assert len(items[1]) == 2
188-
for i in range(2):
189+
assert len(items[2]) == 2
190+
for i in range(3):
189191
for j in range(2):
190192
assert "client.testing" == items[i][j]["name"]
191193

192194
def test_encode_traces_json_v2(self):
193195
# test encoding for JSON format
194-
traces = []
195-
traces.append(
196+
traces = [
196197
[
197198
Span(name="client.testing", tracer=None, span_id=0xAAAAAA),
198199
Span(name="client.testing", tracer=None, span_id=0xAAAAAA),
199-
]
200-
)
201-
traces.append(
200+
],
202201
[
203202
Span(name="client.testing", tracer=None, span_id=0xAAAAAA),
204203
Span(name="client.testing", tracer=None, span_id=0xAAAAAA),
205-
]
206-
)
204+
],
205+
[
206+
Span(name=b"client.testing", tracer=None, span_id=0xAAAAAA),
207+
Span(name=b"client.testing", tracer=None, span_id=0xAAAAAA),
208+
],
209+
]
207210

208211
encoder = JSONEncoderV2()
209212
spans = encoder.encode_traces(traces)
210213
items = json.loads(spans)["traces"]
211214
# test the encoded output that should be a string
212215
# and the output must be flatten
213216
assert isinstance(spans, string_type)
214-
assert len(items) == 2
217+
assert len(items) == 3
215218
assert len(items[0]) == 2
216219
assert len(items[1]) == 2
217-
for i in range(2):
220+
assert len(items[2]) == 2
221+
for i in range(3):
218222
for j in range(2):
219223
assert "client.testing" == items[i][j]["name"]
220224
assert isinstance(items[i][j]["span_id"], string_type)
@@ -235,17 +239,24 @@ def test_encode_traces_msgpack_v03(self):
235239
Span(name="client.testing", tracer=None),
236240
]
237241
)
242+
encoder.put(
243+
[
244+
Span(name=b"client.testing", tracer=None),
245+
Span(name=b"client.testing", tracer=None),
246+
]
247+
)
238248

239249
spans = encoder.encode()
240250
items = encoder._decode(spans)
241251

242252
# test the encoded output that should be a string
243253
# and the output must be flatten
244254
assert isinstance(spans, msgpack_type)
245-
assert len(items) == 2
255+
assert len(items) == 3
246256
assert len(items[0]) == 2
247257
assert len(items[1]) == 2
248-
for i in range(2):
258+
assert len(items[2]) == 2
259+
for i in range(3):
249260
for j in range(2):
250261
assert b"client.testing" == items[i][j][b"name"]
251262

@@ -628,3 +639,53 @@ def run(self):
628639

629640
unpacked = decode(encoder.encode(), reconstruct=True)
630641
assert unpacked is not None
642+
643+
644+
@pytest.mark.parametrize("encoder_cls", ["JSONEncoder", "JSONEncoderV2"])
645+
def test_json_encoder_traces_bytes(encoder_cls, run_python_code_in_subprocess):
646+
"""
647+
Regression test for: https://github.com/DataDog/dd-trace-py/issues/3115
648+
649+
Ensure we properly decode `bytes` objects when encoding with the JSONEncoder
650+
"""
651+
# Run test in a subprocess to test without setting file encoding to utf-8
652+
code = """
653+
import json
654+
655+
from ddtrace.internal.compat import PY3
656+
from ddtrace.internal.encoding import {0}
657+
from ddtrace.span import Span
658+
659+
encoder = {0}()
660+
data = encoder.encode_traces(
661+
[
662+
[
663+
Span(name=b"\\x80span.a", tracer=None),
664+
Span(name=u"\\x80span.b", tracer=None),
665+
Span(name="\\x80span.b", tracer=None),
666+
]
667+
]
668+
)
669+
traces = json.loads(data)
670+
if "{0}" == "JSONEncoderV2":
671+
traces = traces["traces"]
672+
673+
assert len(traces) == 1
674+
span_a, span_b, span_c = traces[0]
675+
676+
if PY3:
677+
assert "\\\\x80span.a" == span_a["name"]
678+
assert u"\\x80span.b" == span_b["name"]
679+
assert u"\\x80span.b" == span_c["name"]
680+
else:
681+
assert u"\\ufffdspan.a" == span_a["name"]
682+
assert u"\\x80span.b" == span_b["name"]
683+
assert u"\\ufffdspan.b" == span_c["name"]
684+
""".format(
685+
encoder_cls
686+
)
687+
688+
out, err, status, pid = run_python_code_in_subprocess(code)
689+
assert status == 0, err
690+
assert out == b""
691+
assert err == b""

0 commit comments

Comments
 (0)