-
-
Notifications
You must be signed in to change notification settings - Fork 33.2k
gh-95382: Use cache for indentations in the JSON encoder #118636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
2e915fd
2a7b274
86c7797
a108c34
86586a5
5778e86
21681c5
0f835d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self); | |||||||
| static int | ||||||||
| encoder_clear(PyEncoderObject *self); | ||||||||
| static int | ||||||||
| encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); | ||||||||
| encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache); | ||||||||
| static int | ||||||||
| encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); | ||||||||
| encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache); | ||||||||
| static int | ||||||||
| encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); | ||||||||
| encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache); | ||||||||
| static PyObject * | ||||||||
| _encoded_const(PyObject *obj); | ||||||||
| static void | ||||||||
|
|
@@ -1252,14 +1252,81 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |||||||
| } | ||||||||
|
|
||||||||
| static PyObject * | ||||||||
| _create_newline_indent(PyObject *indent, Py_ssize_t indent_level) | ||||||||
| _create_indent_cache(PyEncoderObject *s, Py_ssize_t indent_level) | ||||||||
| { | ||||||||
| PyObject *newline_indent = PyUnicode_FromOrdinal('\n'); | ||||||||
| if (newline_indent != NULL && indent_level) { | ||||||||
| PyUnicode_AppendAndDel(&newline_indent, | ||||||||
| PySequence_Repeat(indent, indent_level)); | ||||||||
| PySequence_Repeat(s->indent, indent_level)); | ||||||||
| } | ||||||||
| return newline_indent; | ||||||||
| if (newline_indent == NULL) { | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| PyObject *indent_cache = PyList_New(2); | ||||||||
serhiy-storchaka marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||
| if (indent_cache == NULL) { | ||||||||
| Py_XDECREF(newline_indent); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| PyList_SET_ITEM(indent_cache, 0, newline_indent); | ||||||||
| PyList_SET_ITEM(indent_cache, 1, Py_NewRef(Py_None)); // not used | ||||||||
| return indent_cache; | ||||||||
| } | ||||||||
|
|
||||||||
| static int | ||||||||
| update_newline_indent(PyEncoderObject *s, | ||||||||
| Py_ssize_t indent_level, PyObject *indent_cache) | ||||||||
| { | ||||||||
| assert(indent_level * 2 == PyList_GET_SIZE(indent_cache)); | ||||||||
| assert(indent_level > 0); | ||||||||
| PyObject *newline_indent = PyList_GET_ITEM(indent_cache, (indent_level - 1)*2); | ||||||||
| newline_indent = PyUnicode_Concat(newline_indent, s->indent); | ||||||||
| if (newline_indent == NULL) { | ||||||||
| return -1; | ||||||||
| } | ||||||||
| if (PyList_Append(indent_cache, newline_indent) < 0) { | ||||||||
| Py_DECREF(newline_indent); | ||||||||
| return -1; | ||||||||
| } | ||||||||
| PyObject *separator_indent = PyUnicode_Concat(s->item_separator, newline_indent); | ||||||||
| Py_DECREF(newline_indent); | ||||||||
| if (PyList_Append(indent_cache, separator_indent) < 0) { | ||||||||
| Py_DECREF(separator_indent); | ||||||||
| return -1; | ||||||||
| } | ||||||||
| Py_DECREF(separator_indent); | ||||||||
| return 0; | ||||||||
| } | ||||||||
|
|
||||||||
| static PyObject * | ||||||||
| do_indent(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||||
| Py_ssize_t indent_level, PyObject *indent_cache) | ||||||||
| { | ||||||||
| assert(indent_level > 0); | ||||||||
| assert(s->indent != Py_None); | ||||||||
| PyObject *newline_indent; | ||||||||
| if (indent_level * 2 == PyList_GET_SIZE(indent_cache)) { | ||||||||
| if (update_newline_indent(s, indent_level, indent_cache) < 0) { | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| } | ||||||||
| assert(indent_level * 2 <= PyList_GET_SIZE(indent_cache) - 2); | ||||||||
|
|
||||||||
| newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2); | ||||||||
| if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| return PyList_GET_ITEM(indent_cache, indent_level * 2 + 1); | ||||||||
| } | ||||||||
|
|
||||||||
| static int | ||||||||
| do_dedent(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||||
| Py_ssize_t indent_level, PyObject *indent_cache) | ||||||||
| { | ||||||||
| assert(indent_level >= 0); | ||||||||
| assert(indent_level * 2 <= PyList_GET_SIZE(indent_cache) - 4); | ||||||||
| assert(s->indent != Py_None); | ||||||||
| PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2); | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The assert above is placed after conditional resizing the cache. It is useful to check that we always get the right cache size, independently from conditions. But here we do not change the cache. |
||||||||
| return _PyUnicodeWriter_WriteStr(writer, newline_indent); | ||||||||
| } | ||||||||
|
|
||||||||
| static PyObject * | ||||||||
|
|
@@ -1278,20 +1345,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) | |||||||
| _PyUnicodeWriter_Init(&writer); | ||||||||
| writer.overallocate = 1; | ||||||||
|
|
||||||||
| PyObject *newline_indent = NULL; | ||||||||
| PyObject *indent_cache = NULL; | ||||||||
| if (self->indent != Py_None) { | ||||||||
| newline_indent = _create_newline_indent(self->indent, indent_level); | ||||||||
| if (newline_indent == NULL) { | ||||||||
| indent_cache = _create_indent_cache(self, indent_level); | ||||||||
| if (indent_cache == NULL) { | ||||||||
| _PyUnicodeWriter_Dealloc(&writer); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| } | ||||||||
| if (encoder_listencode_obj(self, &writer, obj, newline_indent)) { | ||||||||
| if (encoder_listencode_obj(self, &writer, obj, 0, indent_cache)) { | ||||||||
| _PyUnicodeWriter_Dealloc(&writer); | ||||||||
| Py_XDECREF(newline_indent); | ||||||||
| Py_XDECREF(indent_cache); | ||||||||
| return NULL; | ||||||||
| } | ||||||||
| Py_XDECREF(newline_indent); | ||||||||
| Py_XDECREF(indent_cache); | ||||||||
|
|
||||||||
| result = PyTuple_New(1); | ||||||||
| if (result == NULL || | ||||||||
|
|
@@ -1379,7 +1446,8 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) | |||||||
|
|
||||||||
| static int | ||||||||
| encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||||
| PyObject *obj, PyObject *newline_indent) | ||||||||
| PyObject *obj, | ||||||||
| Py_ssize_t indent_level, PyObject *indent_cache) | ||||||||
| { | ||||||||
| /* Encode Python object obj to a JSON term */ | ||||||||
| PyObject *newobj; | ||||||||
|
|
@@ -1415,14 +1483,14 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| else if (PyList_Check(obj) || PyTuple_Check(obj)) { | ||||||||
| if (_Py_EnterRecursiveCall(" while encoding a JSON object")) | ||||||||
| return -1; | ||||||||
| rv = encoder_listencode_list(s, writer, obj, newline_indent); | ||||||||
| rv = encoder_listencode_list(s, writer, obj, indent_level, indent_cache); | ||||||||
| _Py_LeaveRecursiveCall(); | ||||||||
| return rv; | ||||||||
| } | ||||||||
| else if (PyDict_Check(obj)) { | ||||||||
| if (_Py_EnterRecursiveCall(" while encoding a JSON object")) | ||||||||
| return -1; | ||||||||
| rv = encoder_listencode_dict(s, writer, obj, newline_indent); | ||||||||
| rv = encoder_listencode_dict(s, writer, obj, indent_level, indent_cache); | ||||||||
| _Py_LeaveRecursiveCall(); | ||||||||
| return rv; | ||||||||
| } | ||||||||
|
|
@@ -1456,7 +1524,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| Py_XDECREF(ident); | ||||||||
| return -1; | ||||||||
| } | ||||||||
| rv = encoder_listencode_obj(s, writer, newobj, newline_indent); | ||||||||
| rv = encoder_listencode_obj(s, writer, newobj, indent_level, indent_cache); | ||||||||
| _Py_LeaveRecursiveCall(); | ||||||||
|
|
||||||||
| Py_DECREF(newobj); | ||||||||
|
|
@@ -1478,7 +1546,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| static int | ||||||||
| encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, | ||||||||
| PyObject *key, PyObject *value, | ||||||||
| PyObject *newline_indent, | ||||||||
| Py_ssize_t indent_level, PyObject *indent_cache, | ||||||||
| PyObject *item_separator) | ||||||||
| { | ||||||||
| PyObject *keystr = NULL; | ||||||||
|
|
@@ -1534,23 +1602,22 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir | |||||||
| if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { | ||||||||
| return -1; | ||||||||
| } | ||||||||
| if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { | ||||||||
| if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) { | ||||||||
| return -1; | ||||||||
| } | ||||||||
| return 0; | ||||||||
| } | ||||||||
|
|
||||||||
| static int | ||||||||
| encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||||
| PyObject *dct, PyObject *newline_indent) | ||||||||
| PyObject *dct, | ||||||||
| Py_ssize_t indent_level, PyObject *indent_cache) | ||||||||
| { | ||||||||
| /* Encode Python dict dct a JSON term */ | ||||||||
| PyObject *ident = NULL; | ||||||||
| PyObject *items = NULL; | ||||||||
| PyObject *key, *value; | ||||||||
| bool first = true; | ||||||||
| PyObject *new_newline_indent = NULL; | ||||||||
| PyObject *separator_indent = NULL; | ||||||||
|
|
||||||||
| if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ | ||||||||
| return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); | ||||||||
|
|
@@ -1574,19 +1641,11 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| if (_PyUnicodeWriter_WriteChar(writer, '{')) | ||||||||
| goto bail; | ||||||||
|
|
||||||||
| PyObject *current_item_separator = s->item_separator; // borrowed reference | ||||||||
| PyObject *separator = s->item_separator; // borrowed reference | ||||||||
| if (s->indent != Py_None) { | ||||||||
| new_newline_indent = PyUnicode_Concat(newline_indent, s->indent); | ||||||||
| if (new_newline_indent == NULL) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent); | ||||||||
| if (separator_indent == NULL) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| // update item separator with a borrowed reference | ||||||||
| current_item_separator = separator_indent; | ||||||||
| if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { | ||||||||
| indent_level++; | ||||||||
| separator = do_indent(s, writer, indent_level, indent_cache); | ||||||||
| if (separator == NULL) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| } | ||||||||
|
|
@@ -1607,8 +1666,8 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| key = PyTuple_GET_ITEM(item, 0); | ||||||||
| value = PyTuple_GET_ITEM(item, 1); | ||||||||
| if (encoder_encode_key_value(s, writer, &first, key, value, | ||||||||
| new_newline_indent, | ||||||||
| current_item_separator) < 0) | ||||||||
| indent_level, indent_cache, | ||||||||
| separator) < 0) | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| Py_CLEAR(items); | ||||||||
|
|
@@ -1617,8 +1676,8 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| Py_ssize_t pos = 0; | ||||||||
| while (PyDict_Next(dct, &pos, &key, &value)) { | ||||||||
| if (encoder_encode_key_value(s, writer, &first, key, value, | ||||||||
| new_newline_indent, | ||||||||
| current_item_separator) < 0) | ||||||||
| indent_level, indent_cache, | ||||||||
| separator) < 0) | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| } | ||||||||
|
|
@@ -1629,12 +1688,8 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| Py_CLEAR(ident); | ||||||||
| } | ||||||||
| if (s->indent != Py_None) { | ||||||||
| Py_CLEAR(new_newline_indent); | ||||||||
| Py_CLEAR(separator_indent); | ||||||||
|
|
||||||||
| if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| indent_level--; | ||||||||
| do_dedent(s, writer, indent_level, indent_cache); | ||||||||
serhiy-storchaka marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||
| } | ||||||||
|
|
||||||||
| if (_PyUnicodeWriter_WriteChar(writer, '}')) | ||||||||
|
|
@@ -1644,20 +1699,17 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| bail: | ||||||||
| Py_XDECREF(items); | ||||||||
| Py_XDECREF(ident); | ||||||||
| Py_XDECREF(separator_indent); | ||||||||
| Py_XDECREF(new_newline_indent); | ||||||||
| return -1; | ||||||||
| } | ||||||||
|
|
||||||||
| static int | ||||||||
| encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||||
| PyObject *seq, PyObject *newline_indent) | ||||||||
| PyObject *seq, | ||||||||
| Py_ssize_t indent_level, PyObject *indent_cache) | ||||||||
| { | ||||||||
| PyObject *ident = NULL; | ||||||||
| PyObject *s_fast = NULL; | ||||||||
| Py_ssize_t i; | ||||||||
| PyObject *new_newline_indent = NULL; | ||||||||
| PyObject *separator_indent = NULL; | ||||||||
|
|
||||||||
| ident = NULL; | ||||||||
| s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); | ||||||||
|
|
@@ -1689,28 +1741,19 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
|
|
||||||||
| PyObject *separator = s->item_separator; // borrowed reference | ||||||||
| if (s->indent != Py_None) { | ||||||||
| new_newline_indent = PyUnicode_Concat(newline_indent, s->indent); | ||||||||
| if (new_newline_indent == NULL) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
|
|
||||||||
| if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { | ||||||||
| indent_level++; | ||||||||
| separator = do_indent(s, writer, indent_level, indent_cache); | ||||||||
| if (separator == NULL) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
|
|
||||||||
| separator_indent = PyUnicode_Concat(separator, new_newline_indent); | ||||||||
| if (separator_indent == NULL) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| separator = separator_indent; // assign separator with borrowed reference | ||||||||
| } | ||||||||
| for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { | ||||||||
| PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); | ||||||||
| if (i) { | ||||||||
| if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) | ||||||||
| if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| if (ident != NULL) { | ||||||||
|
|
@@ -1720,11 +1763,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| } | ||||||||
|
|
||||||||
| if (s->indent != Py_None) { | ||||||||
| Py_CLEAR(new_newline_indent); | ||||||||
| Py_CLEAR(separator_indent); | ||||||||
| if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { | ||||||||
| goto bail; | ||||||||
| } | ||||||||
| indent_level--; | ||||||||
| do_dedent(s, writer, indent_level, indent_cache); | ||||||||
serhiy-storchaka marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||
| } | ||||||||
|
|
||||||||
| if (_PyUnicodeWriter_WriteChar(writer, ']')) | ||||||||
|
|
@@ -1735,8 +1775,6 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | |||||||
| bail: | ||||||||
| Py_XDECREF(ident); | ||||||||
| Py_DECREF(s_fast); | ||||||||
| Py_XDECREF(separator_indent); | ||||||||
| Py_XDECREF(new_newline_indent); | ||||||||
| return -1; | ||||||||
| } | ||||||||
|
|
||||||||
|
|
||||||||
Uh oh!
There was an error while loading. Please reload this page.