@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
5151 char sort_keys ;
5252 char skipkeys ;
5353 int allow_nan ;
54- PyCFunction fast_encode ;
54+ int ( * fast_encode )( PyUnicodeWriter * , PyObject * ) ;
5555} PyEncoderObject ;
5656
5757#define PyEncoderObject_CAST (op ) ((PyEncoderObject *)(op))
@@ -102,8 +102,10 @@ static PyObject *
102102_encoded_const (PyObject * obj );
103103static void
104104raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end );
105- static PyObject *
106- encoder_encode_string (PyEncoderObject * s , PyObject * obj );
105+ static int
106+ _steal_accumulate (PyUnicodeWriter * writer , PyObject * stolen );
107+ static int
108+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj );
107109static PyObject *
108110encoder_encode_float (PyEncoderObject * s , PyObject * obj );
109111
@@ -146,22 +148,11 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
146148 return chars ;
147149}
148150
149- static PyObject *
150- ascii_escape_unicode ( PyObject * pystr )
151+ static Py_ssize_t
152+ ascii_escape_size ( const void * input , int kind , Py_ssize_t input_chars )
151153{
152- /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
153154 Py_ssize_t i ;
154- Py_ssize_t input_chars ;
155155 Py_ssize_t output_size ;
156- Py_ssize_t chars ;
157- PyObject * rval ;
158- const void * input ;
159- Py_UCS1 * output ;
160- int kind ;
161-
162- input_chars = PyUnicode_GET_LENGTH (pystr );
163- input = PyUnicode_DATA (pystr );
164- kind = PyUnicode_KIND (pystr );
165156
166157 /* Compute the output size */
167158 for (i = 0 , output_size = 2 ; i < input_chars ; i ++ ) {
@@ -181,11 +172,22 @@ ascii_escape_unicode(PyObject *pystr)
181172 }
182173 if (output_size > PY_SSIZE_T_MAX - d ) {
183174 PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
184- return NULL ;
175+ return -1 ;
185176 }
186177 output_size += d ;
187178 }
188179
180+ return output_size ;
181+ }
182+
183+ static PyObject *
184+ ascii_escape_unicode_and_size (const void * input , int kind , Py_ssize_t input_chars , Py_ssize_t output_size )
185+ {
186+ Py_ssize_t i ;
187+ Py_ssize_t chars ;
188+ PyObject * rval ;
189+ Py_UCS1 * output ;
190+
189191 rval = PyUnicode_New (output_size , 127 );
190192 if (rval == NULL ) {
191193 return NULL ;
@@ -210,23 +212,62 @@ ascii_escape_unicode(PyObject *pystr)
210212}
211213
212214static PyObject *
213- escape_unicode (PyObject * pystr )
215+ ascii_escape_unicode (PyObject * pystr )
216+ {
217+ /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
218+ Py_ssize_t input_chars = PyUnicode_GET_LENGTH (pystr );
219+ const void * input = PyUnicode_DATA (pystr );
220+ int kind = PyUnicode_KIND (pystr );
221+
222+ Py_ssize_t output_size = ascii_escape_size (input , kind , input_chars );
223+ if (output_size < 0 ) {
224+ return NULL ;
225+ }
226+
227+ return ascii_escape_unicode_and_size (input , kind , input_chars , output_size );
228+ }
229+
230+ static int
231+ write_escaped_ascii (PyUnicodeWriter * writer , PyObject * pystr )
214232{
215- /* Take a PyUnicode pystr and return a new escaped PyUnicode */
216- Py_ssize_t i ;
217233 Py_ssize_t input_chars ;
218- Py_ssize_t output_size ;
219- Py_ssize_t chars ;
220- PyObject * rval ;
221234 const void * input ;
222235 int kind ;
223- Py_UCS4 maxchar ;
224236
225- maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
226237 input_chars = PyUnicode_GET_LENGTH (pystr );
227238 input = PyUnicode_DATA (pystr );
228239 kind = PyUnicode_KIND (pystr );
229240
241+ Py_ssize_t output_size = ascii_escape_size (input , kind , input_chars );
242+ if (output_size < 0 ) {
243+ return -1 ;
244+ }
245+
246+ if (output_size == input_chars + 2 ) {
247+ /* No need to escape anything */
248+ if (PyUnicodeWriter_WriteChar (writer , '"' ) < 0 ) {
249+ return -1 ;
250+ }
251+ if (PyUnicodeWriter_WriteStr (writer , pystr ) < 0 ) {
252+ return -1 ;
253+ }
254+ return PyUnicodeWriter_WriteChar (writer , '"' );
255+ }
256+
257+ PyObject * rval = ascii_escape_unicode_and_size (input , kind , input_chars , output_size );
258+ if (rval == NULL ) {
259+ return -1 ;
260+ }
261+
262+ return _steal_accumulate (writer , rval );
263+ }
264+
265+ static Py_ssize_t
266+ escape_size (const void * input , int kind , Py_ssize_t input_chars )
267+ {
268+ Py_ssize_t i ;
269+ Py_ssize_t output_size ;
270+
230271 /* Compute the output size */
231272 for (i = 0 , output_size = 2 ; i < input_chars ; i ++ ) {
232273 Py_UCS4 c = PyUnicode_READ (kind , input , i );
@@ -244,11 +285,21 @@ escape_unicode(PyObject *pystr)
244285 }
245286 if (output_size > PY_SSIZE_T_MAX - d ) {
246287 PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
247- return NULL ;
288+ return -1 ;
248289 }
249290 output_size += d ;
250291 }
251292
293+ return output_size ;
294+ }
295+
296+ static PyObject *
297+ escape_unicode_and_size (const void * input , int kind , Py_UCS4 maxchar , Py_ssize_t input_chars , Py_ssize_t output_size )
298+ {
299+ Py_ssize_t i ;
300+ Py_ssize_t chars ;
301+ PyObject * rval ;
302+
252303 rval = PyUnicode_New (output_size , maxchar );
253304 if (rval == NULL )
254305 return NULL ;
@@ -303,6 +354,55 @@ escape_unicode(PyObject *pystr)
303354 return rval ;
304355}
305356
357+ static PyObject *
358+ escape_unicode (PyObject * pystr )
359+ {
360+ /* Take a PyUnicode pystr and return a new escaped PyUnicode */
361+ Py_ssize_t input_chars = PyUnicode_GET_LENGTH (pystr );
362+ const void * input = PyUnicode_DATA (pystr );
363+ int kind = PyUnicode_KIND (pystr );
364+ Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
365+
366+ Py_ssize_t output_size = escape_size (input , kind , input_chars );
367+ if (output_size < 0 ) {
368+ return NULL ;
369+ }
370+
371+ return escape_unicode_and_size (input , kind , maxchar , input_chars , output_size );
372+ }
373+
374+ static int
375+ write_escaped_unicode (PyUnicodeWriter * writer , PyObject * pystr )
376+ {
377+ Py_ssize_t input_chars = PyUnicode_GET_LENGTH (pystr );
378+ const void * input = PyUnicode_DATA (pystr );
379+ int kind = PyUnicode_KIND (pystr );
380+ Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
381+
382+ Py_ssize_t output_size = escape_size (input , kind , input_chars );
383+ if (output_size < 0 ) {
384+ return -1 ;
385+ }
386+
387+ if (output_size == input_chars + 2 ) {
388+ /* No need to escape anything */
389+ if (PyUnicodeWriter_WriteChar (writer , '"' ) < 0 ) {
390+ return -1 ;
391+ }
392+ if (PyUnicodeWriter_WriteStr (writer , pystr ) < 0 ) {
393+ return -1 ;
394+ }
395+ return PyUnicodeWriter_WriteChar (writer , '"' );
396+ }
397+
398+ PyObject * rval = escape_unicode_and_size (input , kind , maxchar , input_chars , output_size );
399+ if (rval == NULL ) {
400+ return -1 ;
401+ }
402+
403+ return _steal_accumulate (writer , rval );
404+ }
405+
306406static void
307407raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end )
308408{
@@ -1256,8 +1356,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12561356
12571357 if (PyCFunction_Check (s -> encoder )) {
12581358 PyCFunction f = PyCFunction_GetFunction (s -> encoder );
1259- if (f == py_encode_basestring_ascii || f == py_encode_basestring ) {
1260- s -> fast_encode = f ;
1359+ if (f == py_encode_basestring_ascii ) {
1360+ s -> fast_encode = write_escaped_ascii ;
1361+ }
1362+ else if (f == py_encode_basestring ) {
1363+ s -> fast_encode = write_escaped_unicode ;
12611364 }
12621365 }
12631366
@@ -1438,24 +1541,27 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
14381541 return PyFloat_Type .tp_repr (obj );
14391542}
14401543
1441- static PyObject *
1442- encoder_encode_string (PyEncoderObject * s , PyObject * obj )
1544+ static int
1545+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj )
14431546{
14441547 /* Return the JSON representation of a string */
14451548 PyObject * encoded ;
14461549
14471550 if (s -> fast_encode ) {
1448- return s -> fast_encode (NULL , obj );
1551+ return s -> fast_encode (writer , obj );
14491552 }
14501553 encoded = PyObject_CallOneArg (s -> encoder , obj );
1451- if (encoded != NULL && !PyUnicode_Check (encoded )) {
1554+ if (encoded == NULL ) {
1555+ return -1 ;
1556+ }
1557+ if (!PyUnicode_Check (encoded )) {
14521558 PyErr_Format (PyExc_TypeError ,
14531559 "encoder() must return a string, not %.80s" ,
14541560 Py_TYPE (encoded )-> tp_name );
14551561 Py_DECREF (encoded );
1456- return NULL ;
1562+ return -1 ;
14571563 }
1458- return encoded ;
1564+ return _steal_accumulate ( writer , encoded ) ;
14591565}
14601566
14611567static int
@@ -1486,10 +1592,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14861592 return PyUnicodeWriter_WriteASCII (writer , "false" , 5 );
14871593 }
14881594 else if (PyUnicode_Check (obj )) {
1489- PyObject * encoded = encoder_encode_string (s , obj );
1490- if (encoded == NULL )
1491- return -1 ;
1492- return _steal_accumulate (writer , encoded );
1595+ return encoder_write_string (s , writer , obj );
14931596 }
14941597 else if (PyLong_Check (obj )) {
14951598 if (PyLong_CheckExact (obj )) {
@@ -1578,7 +1681,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
15781681 PyObject * item_separator )
15791682{
15801683 PyObject * keystr = NULL ;
1581- PyObject * encoded ;
1684+ int rv ;
15821685
15831686 if (PyUnicode_Check (key )) {
15841687 keystr = Py_NewRef (key );
@@ -1624,13 +1727,10 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
16241727 }
16251728 }
16261729
1627- encoded = encoder_encode_string ( s , keystr );
1730+ rv = encoder_write_string ( s , writer , keystr );
16281731 Py_DECREF (keystr );
1629- if (encoded == NULL ) {
1630- return -1 ;
1631- }
16321732
1633- if (_steal_accumulate ( writer , encoded ) < 0 ) {
1733+ if (rv < 0 ) {
16341734 return -1 ;
16351735 }
16361736 if (PyUnicodeWriter_WriteStr (writer , s -> key_separator ) < 0 ) {
0 commit comments