Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions Doc/library/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ Basic Usage

.. function:: load(fp, *, cls=None, object_hook=None, parse_float=None, \
parse_int=None, parse_constant=None, \
object_pairs_hook=None, **kw)
object_pairs_hook=None, cache_keys=True, **kw)

Deserialize *fp* to a Python object
using the :ref:`JSON-to-Python conversion table <json-to-py-table>`.
Expand Down Expand Up @@ -321,6 +321,11 @@ Basic Usage
Default ``None``.
:type parse_constant: :term:`callable` | None

:param bool cache_keys:
If set, then repeated keys will be re-used across dictionaries, leading
to lower memory usage, but worse performance.
Default ``True``.

:raises JSONDecodeError:
When the data being deserialized is not a valid JSON document.

Expand All @@ -345,7 +350,11 @@ Basic Usage
conversion length limitation <int_max_str_digits>` to help avoid denial
of service attacks.

.. function:: loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
.. versionchanged:: next

* Added the optional *cache_keys* parameter.

.. function:: loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, cache_keys=True, **kw)

Identical to :func:`load`, but instead of a file-like object,
deserialize *s* (a :class:`str`, :class:`bytes` or :class:`bytearray`
Expand Down
17 changes: 13 additions & 4 deletions Lib/json/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ def detect_encoding(b):


def load(fp, *, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
parse_int=None, parse_constant=None, object_pairs_hook=None,
cache_keys=True, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object.

Expand All @@ -293,11 +294,13 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None,
return loads(fp.read(),
cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
cache_keys=cache_keys, **kw)


def loads(s, *, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
parse_int=None, parse_constant=None, object_pairs_hook=None,
cache_keys=True, **kw):
"""Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
containing a JSON document) to a Python object.

Expand Down Expand Up @@ -327,6 +330,9 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
This can be used to raise an exception if invalid JSON numbers
are encountered.

if ``cache_keys`` is true, then repeated keys will be re-used across
dictionaries, leading to lower memory usage, but worse performance.

To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg; otherwise ``JSONDecoder`` is used.
"""
Expand All @@ -342,7 +348,8 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,

if (cls is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None and not kw):
parse_constant is None and object_pairs_hook is None and
cache_keys and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
Expand All @@ -356,4 +363,6 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
kw['parse_int'] = parse_int
if parse_constant is not None:
kw['parse_constant'] = parse_constant
if not cache_keys:
kw['cache_keys'] = cache_keys
Comment on lines +366 to +367
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like there are no tests to check if new parameters aren't passed.

return cls(**kw).decode(s)
11 changes: 8 additions & 3 deletions Lib/json/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def py_scanstring(s, end, strict=True,


def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
memo=None, cache_keys=True, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
s, end = s_and_end
pairs = []
pairs_append = pairs.append
Expand Down Expand Up @@ -166,7 +166,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
end += 1
while True:
key, end = scanstring(s, end, strict)
key = memo_get(key, key)
if cache_keys:
key = memo_get(key, key)
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':':
Expand Down Expand Up @@ -291,7 +292,7 @@ class JSONDecoder(object):

def __init__(self, *, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
object_pairs_hook=None, cache_keys=True):
"""``object_hook``, if specified, will be called with the result
of every JSON object decoded and its return value will be used in
place of the given ``dict``. This can be used to provide custom
Expand Down Expand Up @@ -323,13 +324,17 @@ def __init__(self, *, object_hook=None, parse_float=None,
characters will be allowed inside strings. Control characters in
this context are those with character codes in the 0-31 range,
including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.

if ``cache_keys`` is true, then repeated keys will be re-used across
dictionaries, leading to lower memory usage, but worse performance.
"""
self.object_hook = object_hook
self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
self.strict = strict
self.object_pairs_hook = object_pairs_hook
self.cache_keys = cache_keys
self.parse_object = JSONObject
self.parse_array = JSONArray
self.parse_string = scanstring
Expand Down
3 changes: 2 additions & 1 deletion Lib/json/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def py_make_scanner(context):
parse_string = context.parse_string
match_number = NUMBER_RE.match
strict = context.strict
cache_keys = context.cache_keys
parse_float = context.parse_float
parse_int = context.parse_int
parse_constant = context.parse_constant
Expand All @@ -35,7 +36,7 @@ def _scan_once(string, idx):
return parse_string(string, idx + 1, strict)
elif nextchar == '{':
return parse_object((string, idx + 1), strict,
_scan_once, object_hook, object_pairs_hook, memo)
_scan_once, object_hook, object_pairs_hook, memo, cache_keys)
elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
Expand Down
14 changes: 14 additions & 0 deletions Lib/test/test_json/test_decode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import decimal
from io import StringIO
from collections import OrderedDict
from functools import partial
from test.test_json import PyTest, CTest
from test import support

Expand Down Expand Up @@ -89,6 +90,19 @@ def test_keys_reuse(self):
self.check_keys_reuse(s, decoder.decode)
self.assertFalse(decoder.memo)

def check_no_keys_reuse(self, source, loads):
rval = loads(source)
(a, b), (c, d) = sorted(rval[0]), sorted(rval[1])
self.assertIsNot(a, c)
self.assertIsNot(b, d)

def test_no_keys_reuse(self):
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
self.check_no_keys_reuse(s, partial(self.loads, cache_keys=False))
decoder = self.json.decoder.JSONDecoder(cache_keys=False)
self.check_no_keys_reuse(s, decoder.decode)
self.assertFalse(decoder.memo)

def test_extra_data(self):
s = '[1, 2, 3]5'
msg = 'Extra data'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow to disable internal string key caching for :func:`json.loads`.
37 changes: 28 additions & 9 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
typedef struct _PyScannerObject {
PyObject_HEAD
signed char strict;
signed char cache_keys;
PyObject *object_hook;
PyObject *object_pairs_hook;
PyObject *parse_float;
Expand All @@ -32,6 +33,7 @@ typedef struct _PyScannerObject {

static PyMemberDef scanner_members[] = {
{"strict", Py_T_BOOL, offsetof(PyScannerObject, strict), Py_READONLY, "strict"},
{"cache_keys", Py_T_BOOL, offsetof(PyScannerObject, cache_keys), Py_READONLY, "cache_keys"},
{"object_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_hook), Py_READONLY, "object_hook"},
{"object_pairs_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), Py_READONLY},
{"parse_float", _Py_T_OBJECT, offsetof(PyScannerObject, parse_float), Py_READONLY, "parse_float"},
Expand Down Expand Up @@ -710,10 +712,12 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
if (key == NULL)
goto bail;
if (PyDict_SetDefaultRef(memo, key, key, &memokey) < 0) {
goto bail;
if (memo != Py_None) {
if (PyDict_SetDefaultRef(memo, key, key, &memokey) < 0) {
goto bail;
}
Py_SETREF(key, memokey);
}
Py_SETREF(key, memokey);
idx = next_idx;

/* skip whitespace between key and : delimiter, read :, skip whitespace */
Expand Down Expand Up @@ -1124,8 +1128,10 @@ scan_once_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssize_
}

static PyObject *
scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
scanner_call(PyObject *op, PyObject *args, PyObject *kwds)
{
PyScannerObject *self = PyScannerObject_CAST(op);

/* Python callable interface to scan_once_{str,unicode} */
PyObject *pystr;
PyObject *rval;
Expand All @@ -1142,12 +1148,17 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
return NULL;
}

PyObject *memo = PyDict_New();
if (memo == NULL) {
return NULL;
PyObject *memo;
if (self->cache_keys) {
memo = PyDict_New();
if (memo == NULL) {
return NULL;
}
}
else {
memo = Py_None;
}
rval = scan_once_unicode(PyScannerObject_CAST(self),
memo, pystr, idx, &next_idx);
rval = scan_once_unicode(self, memo, pystr, idx, &next_idx);
Py_DECREF(memo);
if (rval == NULL)
return NULL;
Expand All @@ -1160,6 +1171,7 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
PyScannerObject *s;
PyObject *ctx;
PyObject *strict;
PyObject *cache_keys;
static char *kwlist[] = {"context", NULL};

if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Expand All @@ -1178,6 +1190,13 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Py_DECREF(strict);
if (s->strict < 0)
goto bail;
cache_keys = PyObject_GetAttrString(ctx, "cache_keys");
if (cache_keys == NULL)
goto bail;
s->cache_keys = PyObject_IsTrue(cache_keys);
Py_DECREF(cache_keys);
if (s->cache_keys < 0)
goto bail;
s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
if (s->object_hook == NULL)
goto bail;
Expand Down
Loading