Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions Lib/test/test_json/test_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,36 @@ def __lt__(self, o):
d[1337] = "true.dat"
self.assertEqual(self.dumps(d, sort_keys=True), '{"1337": "true.dat"}')

def test_dumps_str_subclass(self):
# Don't call obj.__str__() on str subclasses

# str subclass which returns a different string on str(obj)
class StrSubclass(str):
def __str__(self):
return "StrSubclass"

obj = StrSubclass('ascii')
self.assertEqual(self.dumps(obj), '"ascii"')
self.assertEqual(self.dumps([obj]), '["ascii"]')
self.assertEqual(self.dumps({'key': obj}), '{"key": "ascii"}')

obj = StrSubclass('escape\n')
self.assertEqual(self.dumps(obj), '"escape\\n"')
self.assertEqual(self.dumps([obj]), '["escape\\n"]')
self.assertEqual(self.dumps({'key': obj}), '{"key": "escape\\n"}')

obj = StrSubclass('nonascii:é')
self.assertEqual(self.dumps(obj, ensure_ascii=False),
'"nonascii:é"')
self.assertEqual(self.dumps([obj], ensure_ascii=False),
'["nonascii:é"]')
self.assertEqual(self.dumps({'key': obj}, ensure_ascii=False),
'{"key": "nonascii:é"}')
self.assertEqual(self.dumps(obj), '"nonascii:\\u00e9"')
self.assertEqual(self.dumps([obj]), '["nonascii:\\u00e9"]')
self.assertEqual(self.dumps({'key': obj}),
'{"key": "nonascii:\\u00e9"}')


class TestPyDump(TestDump, PyTest): pass

Expand Down
7 changes: 7 additions & 0 deletions Lib/test/test_json/test_encode_basestring_ascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from test.support import bigaddrspacetest


# str subclass which returns a different string on str(obj)
class StrSubclass(str):
def __str__(self):
return "StrSubclass"

CASES = [
('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
Expand All @@ -14,6 +19,8 @@
('\U0001d120', '"\\ud834\\udd20"'),
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
# Don't call obj.__str__() on str subclasses
(StrSubclass('ascii'), '"ascii"'),
]

class TestEncodeBasestringAscii:
Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_json/test_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ class WeirdNum(float, Enum):
neg_inf = NEG_INF
nan = NAN

class StringEnum(str, Enum):
COLOR = "color"

class TestEnum:

def test_floats(self):
Expand Down Expand Up @@ -116,5 +119,11 @@ def test_dict_values(self):
self.assertEqual(nd['j'], NEG_INF)
self.assertTrue(isnan(nd['n']))

def test_str_enum(self):
obj = StringEnum.COLOR
self.assertEqual(self.dumps(obj), '"color"')
self.assertEqual(self.dumps([obj]), '["color"]')
self.assertEqual(self.dumps({'key': obj}), '{"key": "color"}')

class TestPyEnum(TestEnum, PyTest): pass
class TestCEnum(TestEnum, CTest): pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`json`: Fix serialization: no longer call ``str(obj)`` on :class:`str`
subclasses. Patch by Victor Stinner.
9 changes: 7 additions & 2 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,10 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
return -1;
}
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
// gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj)
// on str subclasses
assert(PyUnicode_IS_ASCII(pystr));
if (PyUnicodeWriter_WriteASCII(writer, input, input_chars) < 0) {
return -1;
}
return PyUnicodeWriter_WriteChar(writer, '"');
Expand Down Expand Up @@ -399,7 +402,9 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
return -1;
}
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
// gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj)
// on str subclasses
if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, pystr) < 0) {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative to using the private API (which is bad!) is to call PyUnicode_FromObject(). But PyUnicode_FromObject() has to copy the string, it's less efficient.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See also the issue gh-148250 that I just created for this issue.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using the private API

It would raise a deprecated warning.

_Py_DEPRECATED_EXTERNALLY(3.14) PyAPI_FUNC(int) _PyUnicodeWriter_WriteStr(
    _PyUnicodeWriter *writer,
    PyObject *str); 

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it safe to cast PyUnicodeWriter* to _PyUnicodeWriter*?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's safe. That's how PyUnicodeWriter_WriteStr() is implemented: by casting the first argument to _PyUnicodeWriter* and calling _PyUnicodeWriter_WriteStr().

Do you prefer calling PyUnicode_FromObject() to avoid the private API?

It would raise a deprecated warning.

_Py_DEPRECATED_EXTERNALLY() only emits a deprecation warning if the Py_BUILD_CORE macro is not defined. This macro is defined in Modules/_json.c, so no warning is emitted.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it safe to cast PyUnicodeWriter* to _PyUnicodeWriter*?

Yes. I checked, PyUnicodeWriter is an opaque type that is always cast to _PyUnicodeWriter before use.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am in favor of public API.

Is PyUnicodeWriter_WriteSubstring applicable for this?

PyUnicodeWriter_WriteSubstring the only PyUnicodeWriter public API that treats str subclasses as str.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is fine. We already use private API in this file.

return -1;
}
return PyUnicodeWriter_WriteChar(writer, '"');
Expand Down
Loading