@@ -6406,32 +6406,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64066406 const char * errors ,
64076407 int byteorder )
64086408{
6409- int kind ;
6410- const void * data ;
6411- Py_ssize_t len ;
6412- PyObject * v ;
6413- unsigned short * out ;
6414- Py_ssize_t pairs ;
6415- #if PY_BIG_ENDIAN
6416- int native_ordering = byteorder >= 0 ;
6417- #else
6418- int native_ordering = byteorder <= 0 ;
6419- #endif
6420- const char * encoding ;
6421- Py_ssize_t nsize , pos ;
6422- PyObject * errorHandler = NULL ;
6423- PyObject * exc = NULL ;
6424- PyObject * rep = NULL ;
6425-
64266409 if (!PyUnicode_Check (str )) {
64276410 PyErr_BadArgument ();
64286411 return NULL ;
64296412 }
6430- kind = PyUnicode_KIND (str );
6431- data = PyUnicode_DATA (str );
6432- len = PyUnicode_GET_LENGTH (str );
6413+ int kind = PyUnicode_KIND (str );
6414+ const void * data = PyUnicode_DATA (str );
6415+ Py_ssize_t len = PyUnicode_GET_LENGTH (str );
64336416
6434- pairs = 0 ;
6417+ Py_ssize_t pairs = 0 ;
64356418 if (kind == PyUnicode_4BYTE_KIND ) {
64366419 const Py_UCS4 * in = (const Py_UCS4 * )data ;
64376420 const Py_UCS4 * end = in + len ;
@@ -6444,27 +6427,48 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64446427 if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0 )) {
64456428 return PyErr_NoMemory ();
64466429 }
6447- nsize = len + pairs + (byteorder == 0 );
6448- v = PyBytes_FromStringAndSize (NULL , nsize * 2 );
6449- if (v == NULL ) {
6430+ Py_ssize_t nsize = len + pairs + (byteorder == 0 );
6431+
6432+ #if PY_BIG_ENDIAN
6433+ int native_ordering = byteorder >= 0 ;
6434+ #else
6435+ int native_ordering = byteorder <= 0 ;
6436+ #endif
6437+
6438+ if (kind == PyUnicode_1BYTE_KIND ) {
6439+ PyObject * v = PyBytes_FromStringAndSize (NULL , nsize * 2 );
6440+ if (v == NULL ) {
6441+ return NULL ;
6442+ }
6443+
6444+ /* output buffer is 2-bytes aligned */
6445+ assert (_Py_IS_ALIGNED (PyBytes_AS_STRING (v ), 2 ));
6446+ unsigned short * out = (unsigned short * )PyBytes_AS_STRING (v );
6447+ if (byteorder == 0 ) {
6448+ * out ++ = 0xFEFF ;
6449+ }
6450+ if (len > 0 ) {
6451+ ucs1lib_utf16_encode ((const Py_UCS1 * )data , len , & out , native_ordering );
6452+ }
6453+ return v ;
6454+ }
6455+
6456+ PyBytesWriter * writer = PyBytesWriter_Create (nsize * 2 );
6457+ if (writer == NULL ) {
64506458 return NULL ;
64516459 }
64526460
64536461 /* output buffer is 2-bytes aligned */
6454- assert (_Py_IS_ALIGNED (PyBytes_AS_STRING ( v ), 2 ));
6455- out = ( unsigned short * ) PyBytes_AS_STRING ( v );
6462+ assert (_Py_IS_ALIGNED (PyBytesWriter_GetData ( writer ), 2 ));
6463+ unsigned short * out = PyBytesWriter_GetData ( writer );
64566464 if (byteorder == 0 ) {
64576465 * out ++ = 0xFEFF ;
64586466 }
64596467 if (len == 0 ) {
6460- goto done ;
6461- }
6462-
6463- if (kind == PyUnicode_1BYTE_KIND ) {
6464- ucs1lib_utf16_encode ((const Py_UCS1 * )data , len , & out , native_ordering );
6465- goto done ;
6468+ return PyBytesWriter_Finish (writer );
64666469 }
64676470
6471+ const char * encoding ;
64686472 if (byteorder < 0 ) {
64696473 encoding = "utf-16-le" ;
64706474 }
@@ -6475,10 +6479,11 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64756479 encoding = "utf-16" ;
64766480 }
64776481
6478- pos = 0 ;
6479- while ( pos < len ) {
6480- Py_ssize_t newpos , repsize , moreunits ;
6482+ PyObject * errorHandler = NULL ;
6483+ PyObject * exc = NULL ;
6484+ PyObject * rep = NULL ;
64816485
6486+ for (Py_ssize_t pos = 0 ; pos < len ; ) {
64826487 if (kind == PyUnicode_2BYTE_KIND ) {
64836488 pos += ucs2lib_utf16_encode ((const Py_UCS2 * )data + pos , len - pos ,
64846489 & out , native_ordering );
@@ -6491,13 +6496,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64916496 if (pos == len )
64926497 break ;
64936498
6499+ Py_ssize_t newpos ;
64946500 rep = unicode_encode_call_errorhandler (
64956501 errors , & errorHandler ,
64966502 encoding , "surrogates not allowed" ,
64976503 str , & exc , pos , pos + 1 , & newpos );
64986504 if (!rep )
64996505 goto error ;
65006506
6507+ Py_ssize_t repsize , moreunits ;
65016508 if (PyBytes_Check (rep )) {
65026509 repsize = PyBytes_GET_SIZE (rep );
65036510 if (repsize & 1 ) {
@@ -6523,21 +6530,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,
65236530
65246531 /* two bytes are reserved for each surrogate */
65256532 if (moreunits > 0 ) {
6526- Py_ssize_t outpos = out - (unsigned short * ) PyBytes_AS_STRING (v );
6527- if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE (v )) / 2 ) {
6528- /* integer overflow */
6529- PyErr_NoMemory ();
6533+ out = PyBytesWriter_GrowAndUpdatePointer (writer , 2 * moreunits , out );
6534+ if (out == NULL ) {
65306535 goto error ;
65316536 }
6532- if (_PyBytes_Resize (& v , PyBytes_GET_SIZE (v ) + 2 * moreunits ) < 0 )
6533- goto error ;
6534- out = (unsigned short * ) PyBytes_AS_STRING (v ) + outpos ;
65356537 }
65366538
65376539 if (PyBytes_Check (rep )) {
65386540 memcpy (out , PyBytes_AS_STRING (rep ), repsize );
65396541 out += repsize / 2 ;
6540- } else /* rep is unicode */ {
6542+ } else {
6543+ /* rep is unicode */
65416544 assert (PyUnicode_KIND (rep ) == PyUnicode_1BYTE_KIND );
65426545 ucs1lib_utf16_encode (PyUnicode_1BYTE_DATA (rep ), repsize ,
65436546 & out , native_ordering );
@@ -6546,23 +6549,20 @@ _PyUnicode_EncodeUTF16(PyObject *str,
65466549 Py_CLEAR (rep );
65476550 }
65486551
6552+ Py_XDECREF (errorHandler );
6553+ Py_XDECREF (exc );
6554+
65496555 /* Cut back to size actually needed. This is necessary for, for example,
65506556 encoding of a string containing isolated surrogates and the 'ignore' handler
65516557 is used. */
6552- nsize = (unsigned char * ) out - (unsigned char * ) PyBytes_AS_STRING (v );
6553- if (nsize != PyBytes_GET_SIZE (v ))
6554- _PyBytes_Resize (& v , nsize );
6555- Py_XDECREF (errorHandler );
6556- Py_XDECREF (exc );
6557- done :
6558- return v ;
6558+ return PyBytesWriter_FinishWithPointer (writer , out );
6559+
65596560 error :
65606561 Py_XDECREF (rep );
65616562 Py_XDECREF (errorHandler );
65626563 Py_XDECREF (exc );
6563- Py_XDECREF ( v );
6564+ PyBytesWriter_Discard ( writer );
65646565 return NULL ;
6565- #undef STORECHAR
65666566}
65676567
65686568PyObject *
0 commit comments