Skip to content

Commit 8f956aa

Browse files
committed
gh-125196: Use PyUnicodeWriter for JSON encoder
Replace the private _PyUnicodeWriter with the public PyUnicodeWriter.
1 parent 1639d93 commit 8f956aa

File tree

1 file changed

+49
-38
lines changed

1 file changed

+49
-38
lines changed

Modules/_json.c

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self);
8686
static int
8787
encoder_clear(PyEncoderObject *self);
8888
static int
89-
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
89+
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
9090
static int
91-
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
91+
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
9292
static int
93-
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
93+
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
9494
static PyObject *
9595
_encoded_const(PyObject *obj);
9696
static void
@@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
12681268
{
12691269
/* Python callable interface to encode_listencode_obj */
12701270
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1271-
PyObject *obj, *result;
1271+
PyObject *obj;
12721272
Py_ssize_t indent_level;
1273-
_PyUnicodeWriter writer;
12741273

12751274
if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1276-
&obj, &indent_level))
1275+
&obj, &indent_level))
12771276
return NULL;
12781277

1279-
_PyUnicodeWriter_Init(&writer);
1280-
writer.overallocate = 1;
1278+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1279+
if (writer == NULL) {
1280+
return NULL;
1281+
}
12811282

12821283
PyObject *newline_indent = NULL;
12831284
if (self->indent != Py_None) {
12841285
newline_indent = _create_newline_indent(self->indent, indent_level);
12851286
if (newline_indent == NULL) {
1286-
_PyUnicodeWriter_Dealloc(&writer);
1287+
PyUnicodeWriter_Discard(writer);
12871288
return NULL;
12881289
}
12891290
}
1290-
if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
1291-
_PyUnicodeWriter_Dealloc(&writer);
1291+
if (encoder_listencode_obj(self, writer, obj, newline_indent)) {
1292+
PyUnicodeWriter_Discard(writer);
12921293
Py_XDECREF(newline_indent);
12931294
return NULL;
12941295
}
12951296
Py_XDECREF(newline_indent);
12961297

1297-
result = PyTuple_New(1);
1298-
if (result == NULL ||
1299-
PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
1300-
Py_XDECREF(result);
1298+
PyObject *str = PyUnicodeWriter_Finish(writer);
1299+
if (str == NULL) {
13011300
return NULL;
13021301
}
1302+
PyObject *result = PyTuple_Pack(1, str);
1303+
Py_DECREF(str);
13031304
return result;
13041305
}
13051306

@@ -1370,30 +1371,30 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
13701371
}
13711372

13721373
static int
1373-
_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
1374+
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
13741375
{
13751376
/* Append stolen and then decrement its reference count */
1376-
int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
1377+
int rval = PyUnicodeWriter_WriteStr(writer, stolen);
13771378
Py_DECREF(stolen);
13781379
return rval;
13791380
}
13801381

13811382
static int
1382-
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
1383+
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
13831384
PyObject *obj, PyObject *newline_indent)
13841385
{
13851386
/* Encode Python object obj to a JSON term */
13861387
PyObject *newobj;
13871388
int rv;
13881389

13891390
if (obj == Py_None) {
1390-
return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
1391+
return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
13911392
}
13921393
else if (obj == Py_True) {
1393-
return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
1394+
return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
13941395
}
13951396
else if (obj == Py_False) {
1396-
return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
1397+
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
13971398
}
13981399
else if (PyUnicode_Check(obj)) {
13991400
PyObject *encoded = encoder_encode_string(s, obj);
@@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
14021403
return _steal_accumulate(writer, encoded);
14031404
}
14041405
else if (PyLong_Check(obj)) {
1406+
if (PyLong_CheckExact(obj)) {
1407+
// Fast-path for exact integers
1408+
return PyUnicodeWriter_WriteRepr(writer, obj);
1409+
}
14051410
PyObject *encoded = PyLong_Type.tp_repr(obj);
14061411
if (encoded == NULL)
14071412
return -1;
@@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
14781483
}
14791484

14801485
static int
1481-
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
1486+
encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first,
14821487
PyObject *dct, PyObject *key, PyObject *value,
14831488
PyObject *newline_indent,
14841489
PyObject *item_separator)
@@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
15181523
*first = false;
15191524
}
15201525
else {
1521-
if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
1526+
if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
15221527
Py_DECREF(keystr);
15231528
return -1;
15241529
}
@@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
15331538
if (_steal_accumulate(writer, encoded) < 0) {
15341539
return -1;
15351540
}
1536-
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
1541+
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
15371542
return -1;
15381543
}
15391544
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
@@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
15441549
}
15451550

15461551
static int
1547-
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
1552+
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
15481553
PyObject *dct, PyObject *newline_indent)
15491554
{
15501555
/* Encode Python dict dct a JSON term */
@@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
15551560
PyObject *new_newline_indent = NULL;
15561561
PyObject *separator_indent = NULL;
15571562

1558-
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
1559-
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
1563+
if (PyDict_GET_SIZE(dct) == 0) {
1564+
/* Fast path */
1565+
return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
1566+
}
15601567

15611568
if (s->markers != Py_None) {
15621569
int has_key;
@@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
15741581
}
15751582
}
15761583

1577-
if (_PyUnicodeWriter_WriteChar(writer, '{'))
1584+
if (PyUnicodeWriter_WriteChar(writer, '{')) {
15781585
goto bail;
1586+
}
15791587

15801588
PyObject *current_item_separator = s->item_separator; // borrowed reference
15811589
if (s->indent != Py_None) {
@@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
15891597
}
15901598
// update item separator with a borrowed reference
15911599
current_item_separator = separator_indent;
1592-
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1600+
if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
15931601
goto bail;
15941602
}
15951603
}
@@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16351643
Py_CLEAR(new_newline_indent);
16361644
Py_CLEAR(separator_indent);
16371645

1638-
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1646+
if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
16391647
goto bail;
16401648
}
16411649
}
16421650

1643-
if (_PyUnicodeWriter_WriteChar(writer, '}'))
1651+
if (PyUnicodeWriter_WriteChar(writer, '}')) {
16441652
goto bail;
1653+
}
16451654
return 0;
16461655

16471656
bail:
@@ -1653,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16531662
}
16541663

16551664
static int
1656-
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
1665+
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
16571666
PyObject *seq, PyObject *newline_indent)
16581667
{
16591668
PyObject *ident = NULL;
@@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
16681677
return -1;
16691678
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
16701679
Py_DECREF(s_fast);
1671-
return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
1680+
return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
16721681
}
16731682

16741683
if (s->markers != Py_None) {
@@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
16871696
}
16881697
}
16891698

1690-
if (_PyUnicodeWriter_WriteChar(writer, '['))
1699+
if (PyUnicodeWriter_WriteChar(writer, '[')) {
16911700
goto bail;
1701+
}
16921702

16931703
PyObject *separator = s->item_separator; // borrowed reference
16941704
if (s->indent != Py_None) {
@@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
16971707
goto bail;
16981708
}
16991709

1700-
if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1710+
if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
17011711
goto bail;
17021712
}
17031713

@@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
17101720
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
17111721
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
17121722
if (i) {
1713-
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
1723+
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
17141724
goto bail;
17151725
}
17161726
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
@@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
17271737
if (s->indent != Py_None) {
17281738
Py_CLEAR(new_newline_indent);
17291739
Py_CLEAR(separator_indent);
1730-
if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1740+
if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
17311741
goto bail;
17321742
}
17331743
}
17341744

1735-
if (_PyUnicodeWriter_WriteChar(writer, ']'))
1745+
if (PyUnicodeWriter_WriteChar(writer, ']')) {
17361746
goto bail;
1747+
}
17371748
Py_DECREF(s_fast);
17381749
return 0;
17391750

0 commit comments

Comments
 (0)