@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
51
51
char sort_keys ;
52
52
char skipkeys ;
53
53
int allow_nan ;
54
- PyCFunction fast_encode ;
54
+ int ( * fast_encode )( PyUnicodeWriter * , PyObject * ) ;
55
55
} PyEncoderObject ;
56
56
57
57
#define PyEncoderObject_CAST (op ) ((PyEncoderObject *)(op))
@@ -102,8 +102,10 @@ static PyObject *
102
102
_encoded_const (PyObject * obj );
103
103
static void
104
104
raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end );
105
- static PyObject *
106
- encoder_encode_string (PyEncoderObject * s , PyObject * obj );
105
+ static int
106
+ _steal_accumulate (PyUnicodeWriter * writer , PyObject * stolen );
107
+ static int
108
+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj );
107
109
static PyObject *
108
110
encoder_encode_float (PyEncoderObject * s , PyObject * obj );
109
111
@@ -146,22 +148,11 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
146
148
return chars ;
147
149
}
148
150
149
- static PyObject *
150
- ascii_escape_unicode ( PyObject * pystr )
151
+ static Py_ssize_t
152
+ ascii_escape_size ( const void * input , int kind , Py_ssize_t input_chars )
151
153
{
152
- /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
153
154
Py_ssize_t i ;
154
- Py_ssize_t input_chars ;
155
155
Py_ssize_t output_size ;
156
- Py_ssize_t chars ;
157
- PyObject * rval ;
158
- const void * input ;
159
- Py_UCS1 * output ;
160
- int kind ;
161
-
162
- input_chars = PyUnicode_GET_LENGTH (pystr );
163
- input = PyUnicode_DATA (pystr );
164
- kind = PyUnicode_KIND (pystr );
165
156
166
157
/* Compute the output size */
167
158
for (i = 0 , output_size = 2 ; i < input_chars ; i ++ ) {
@@ -181,11 +172,22 @@ ascii_escape_unicode(PyObject *pystr)
181
172
}
182
173
if (output_size > PY_SSIZE_T_MAX - d ) {
183
174
PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
184
- return NULL ;
175
+ return -1 ;
185
176
}
186
177
output_size += d ;
187
178
}
188
179
180
+ return output_size ;
181
+ }
182
+
183
+ static PyObject *
184
+ ascii_escape_unicode_and_size (const void * input , int kind , Py_ssize_t input_chars , Py_ssize_t output_size )
185
+ {
186
+ Py_ssize_t i ;
187
+ Py_ssize_t chars ;
188
+ PyObject * rval ;
189
+ Py_UCS1 * output ;
190
+
189
191
rval = PyUnicode_New (output_size , 127 );
190
192
if (rval == NULL ) {
191
193
return NULL ;
@@ -210,23 +212,62 @@ ascii_escape_unicode(PyObject *pystr)
210
212
}
211
213
212
214
static PyObject *
213
- escape_unicode (PyObject * pystr )
215
+ ascii_escape_unicode (PyObject * pystr )
216
+ {
217
+ /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
218
+ Py_ssize_t input_chars = PyUnicode_GET_LENGTH (pystr );
219
+ const void * input = PyUnicode_DATA (pystr );
220
+ int kind = PyUnicode_KIND (pystr );
221
+
222
+ Py_ssize_t output_size = ascii_escape_size (input , kind , input_chars );
223
+ if (output_size < 0 ) {
224
+ return NULL ;
225
+ }
226
+
227
+ return ascii_escape_unicode_and_size (input , kind , input_chars , output_size );
228
+ }
229
+
230
+ static int
231
+ write_escaped_ascii (PyUnicodeWriter * writer , PyObject * pystr )
214
232
{
215
- /* Take a PyUnicode pystr and return a new escaped PyUnicode */
216
- Py_ssize_t i ;
217
233
Py_ssize_t input_chars ;
218
- Py_ssize_t output_size ;
219
- Py_ssize_t chars ;
220
- PyObject * rval ;
221
234
const void * input ;
222
235
int kind ;
223
- Py_UCS4 maxchar ;
224
236
225
- maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
226
237
input_chars = PyUnicode_GET_LENGTH (pystr );
227
238
input = PyUnicode_DATA (pystr );
228
239
kind = PyUnicode_KIND (pystr );
229
240
241
+ Py_ssize_t output_size = ascii_escape_size (input , kind , input_chars );
242
+ if (output_size < 0 ) {
243
+ return -1 ;
244
+ }
245
+
246
+ if (output_size == input_chars + 2 ) {
247
+ /* No need to escape anything */
248
+ if (PyUnicodeWriter_WriteChar (writer , '"' ) < 0 ) {
249
+ return -1 ;
250
+ }
251
+ if (PyUnicodeWriter_WriteStr (writer , pystr ) < 0 ) {
252
+ return -1 ;
253
+ }
254
+ return PyUnicodeWriter_WriteChar (writer , '"' );
255
+ }
256
+
257
+ PyObject * rval = ascii_escape_unicode_and_size (input , kind , input_chars , output_size );
258
+ if (rval == NULL ) {
259
+ return -1 ;
260
+ }
261
+
262
+ return _steal_accumulate (writer , rval );
263
+ }
264
+
265
+ static Py_ssize_t
266
+ escape_size (const void * input , int kind , Py_ssize_t input_chars )
267
+ {
268
+ Py_ssize_t i ;
269
+ Py_ssize_t output_size ;
270
+
230
271
/* Compute the output size */
231
272
for (i = 0 , output_size = 2 ; i < input_chars ; i ++ ) {
232
273
Py_UCS4 c = PyUnicode_READ (kind , input , i );
@@ -244,11 +285,21 @@ escape_unicode(PyObject *pystr)
244
285
}
245
286
if (output_size > PY_SSIZE_T_MAX - d ) {
246
287
PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
247
- return NULL ;
288
+ return -1 ;
248
289
}
249
290
output_size += d ;
250
291
}
251
292
293
+ return output_size ;
294
+ }
295
+
296
+ static PyObject *
297
+ escape_unicode_and_size (const void * input , int kind , Py_UCS4 maxchar , Py_ssize_t input_chars , Py_ssize_t output_size )
298
+ {
299
+ Py_ssize_t i ;
300
+ Py_ssize_t chars ;
301
+ PyObject * rval ;
302
+
252
303
rval = PyUnicode_New (output_size , maxchar );
253
304
if (rval == NULL )
254
305
return NULL ;
@@ -303,6 +354,55 @@ escape_unicode(PyObject *pystr)
303
354
return rval ;
304
355
}
305
356
357
+ static PyObject *
358
+ escape_unicode (PyObject * pystr )
359
+ {
360
+ /* Take a PyUnicode pystr and return a new escaped PyUnicode */
361
+ Py_ssize_t input_chars = PyUnicode_GET_LENGTH (pystr );
362
+ const void * input = PyUnicode_DATA (pystr );
363
+ int kind = PyUnicode_KIND (pystr );
364
+ Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
365
+
366
+ Py_ssize_t output_size = escape_size (input , kind , input_chars );
367
+ if (output_size < 0 ) {
368
+ return NULL ;
369
+ }
370
+
371
+ return escape_unicode_and_size (input , kind , maxchar , input_chars , output_size );
372
+ }
373
+
374
+ static int
375
+ write_escaped_unicode (PyUnicodeWriter * writer , PyObject * pystr )
376
+ {
377
+ Py_ssize_t input_chars = PyUnicode_GET_LENGTH (pystr );
378
+ const void * input = PyUnicode_DATA (pystr );
379
+ int kind = PyUnicode_KIND (pystr );
380
+ Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
381
+
382
+ Py_ssize_t output_size = escape_size (input , kind , input_chars );
383
+ if (output_size < 0 ) {
384
+ return -1 ;
385
+ }
386
+
387
+ if (output_size == input_chars + 2 ) {
388
+ /* No need to escape anything */
389
+ if (PyUnicodeWriter_WriteChar (writer , '"' ) < 0 ) {
390
+ return -1 ;
391
+ }
392
+ if (PyUnicodeWriter_WriteStr (writer , pystr ) < 0 ) {
393
+ return -1 ;
394
+ }
395
+ return PyUnicodeWriter_WriteChar (writer , '"' );
396
+ }
397
+
398
+ PyObject * rval = escape_unicode_and_size (input , kind , maxchar , input_chars , output_size );
399
+ if (rval == NULL ) {
400
+ return -1 ;
401
+ }
402
+
403
+ return _steal_accumulate (writer , rval );
404
+ }
405
+
306
406
static void
307
407
raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end )
308
408
{
@@ -1256,8 +1356,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1256
1356
1257
1357
if (PyCFunction_Check (s -> encoder )) {
1258
1358
PyCFunction f = PyCFunction_GetFunction (s -> encoder );
1259
- if (f == py_encode_basestring_ascii || f == py_encode_basestring ) {
1260
- s -> fast_encode = f ;
1359
+ if (f == py_encode_basestring_ascii ) {
1360
+ s -> fast_encode = write_escaped_ascii ;
1361
+ }
1362
+ else if (f == py_encode_basestring ) {
1363
+ s -> fast_encode = write_escaped_unicode ;
1261
1364
}
1262
1365
}
1263
1366
@@ -1438,24 +1541,27 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1438
1541
return PyFloat_Type .tp_repr (obj );
1439
1542
}
1440
1543
1441
- static PyObject *
1442
- encoder_encode_string (PyEncoderObject * s , PyObject * obj )
1544
+ static int
1545
+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj )
1443
1546
{
1444
1547
/* Return the JSON representation of a string */
1445
1548
PyObject * encoded ;
1446
1549
1447
1550
if (s -> fast_encode ) {
1448
- return s -> fast_encode (NULL , obj );
1551
+ return s -> fast_encode (writer , obj );
1449
1552
}
1450
1553
encoded = PyObject_CallOneArg (s -> encoder , obj );
1451
- if (encoded != NULL && !PyUnicode_Check (encoded )) {
1554
+ if (encoded == NULL ) {
1555
+ return -1 ;
1556
+ }
1557
+ if (!PyUnicode_Check (encoded )) {
1452
1558
PyErr_Format (PyExc_TypeError ,
1453
1559
"encoder() must return a string, not %.80s" ,
1454
1560
Py_TYPE (encoded )-> tp_name );
1455
1561
Py_DECREF (encoded );
1456
- return NULL ;
1562
+ return -1 ;
1457
1563
}
1458
- return encoded ;
1564
+ return _steal_accumulate ( writer , encoded ) ;
1459
1565
}
1460
1566
1461
1567
static int
@@ -1486,10 +1592,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
1486
1592
return PyUnicodeWriter_WriteASCII (writer , "false" , 5 );
1487
1593
}
1488
1594
else if (PyUnicode_Check (obj )) {
1489
- PyObject * encoded = encoder_encode_string (s , obj );
1490
- if (encoded == NULL )
1491
- return -1 ;
1492
- return _steal_accumulate (writer , encoded );
1595
+ return encoder_write_string (s , writer , obj );
1493
1596
}
1494
1597
else if (PyLong_Check (obj )) {
1495
1598
if (PyLong_CheckExact (obj )) {
@@ -1578,7 +1681,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
1578
1681
PyObject * item_separator )
1579
1682
{
1580
1683
PyObject * keystr = NULL ;
1581
- PyObject * encoded ;
1684
+ int rv ;
1582
1685
1583
1686
if (PyUnicode_Check (key )) {
1584
1687
keystr = Py_NewRef (key );
@@ -1624,13 +1727,10 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
1624
1727
}
1625
1728
}
1626
1729
1627
- encoded = encoder_encode_string ( s , keystr );
1730
+ rv = encoder_write_string ( s , writer , keystr );
1628
1731
Py_DECREF (keystr );
1629
- if (encoded == NULL ) {
1630
- return -1 ;
1631
- }
1632
1732
1633
- if (_steal_accumulate ( writer , encoded ) < 0 ) {
1733
+ if (rv < 0 ) {
1634
1734
return -1 ;
1635
1735
}
1636
1736
if (PyUnicodeWriter_WriteStr (writer , s -> key_separator ) < 0 ) {
0 commit comments