@@ -6407,32 +6407,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64076407 const char * errors ,
64086408 int byteorder )
64096409{
6410- int kind ;
6411- const void * data ;
6412- Py_ssize_t len ;
6413- PyObject * v ;
6414- unsigned short * out ;
6415- Py_ssize_t pairs ;
6416- #if PY_BIG_ENDIAN
6417- int native_ordering = byteorder >= 0 ;
6418- #else
6419- int native_ordering = byteorder <= 0 ;
6420- #endif
6421- const char * encoding ;
6422- Py_ssize_t nsize , pos ;
6423- PyObject * errorHandler = NULL ;
6424- PyObject * exc = NULL ;
6425- PyObject * rep = NULL ;
6426-
64276410 if (!PyUnicode_Check (str )) {
64286411 PyErr_BadArgument ();
64296412 return NULL ;
64306413 }
6431- kind = PyUnicode_KIND (str );
6432- data = PyUnicode_DATA (str );
6433- len = PyUnicode_GET_LENGTH (str );
6414+ int kind = PyUnicode_KIND (str );
6415+ const void * data = PyUnicode_DATA (str );
6416+ Py_ssize_t len = PyUnicode_GET_LENGTH (str );
64346417
6435- pairs = 0 ;
6418+ Py_ssize_t pairs = 0 ;
64366419 if (kind == PyUnicode_4BYTE_KIND ) {
64376420 const Py_UCS4 * in = (const Py_UCS4 * )data ;
64386421 const Py_UCS4 * end = in + len ;
@@ -6445,27 +6428,48 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64456428 if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0 )) {
64466429 return PyErr_NoMemory ();
64476430 }
6448- nsize = len + pairs + (byteorder == 0 );
6449- v = PyBytes_FromStringAndSize (NULL , nsize * 2 );
6450- if (v == NULL ) {
6431+ Py_ssize_t nsize = len + pairs + (byteorder == 0 );
6432+
6433+ #if PY_BIG_ENDIAN
6434+ int native_ordering = byteorder >= 0 ;
6435+ #else
6436+ int native_ordering = byteorder <= 0 ;
6437+ #endif
6438+
6439+ if (kind == PyUnicode_1BYTE_KIND ) {
6440+ PyObject * v = PyBytes_FromStringAndSize (NULL , nsize * 2 );
6441+ if (v == NULL ) {
6442+ return NULL ;
6443+ }
6444+
6445+ /* output buffer is 2-bytes aligned */
6446+ assert (_Py_IS_ALIGNED (PyBytes_AS_STRING (v ), 2 ));
6447+ unsigned short * out = (unsigned short * )PyBytes_AS_STRING (v );
6448+ if (byteorder == 0 ) {
6449+ * out ++ = 0xFEFF ;
6450+ }
6451+ if (len > 0 ) {
6452+ ucs1lib_utf16_encode ((const Py_UCS1 * )data , len , & out , native_ordering );
6453+ }
6454+ return v ;
6455+ }
6456+
6457+ PyBytesWriter * writer = PyBytesWriter_Create (nsize * 2 );
6458+ if (writer == NULL ) {
64516459 return NULL ;
64526460 }
64536461
64546462 /* output buffer is 2-bytes aligned */
6455- assert (_Py_IS_ALIGNED (PyBytes_AS_STRING ( v ), 2 ));
6456- out = ( unsigned short * ) PyBytes_AS_STRING ( v );
6463+ assert (_Py_IS_ALIGNED (PyBytesWriter_GetData ( writer ), 2 ));
6464+ unsigned short * out = PyBytesWriter_GetData ( writer );
64576465 if (byteorder == 0 ) {
64586466 * out ++ = 0xFEFF ;
64596467 }
64606468 if (len == 0 ) {
6461- goto done ;
6462- }
6463-
6464- if (kind == PyUnicode_1BYTE_KIND ) {
6465- ucs1lib_utf16_encode ((const Py_UCS1 * )data , len , & out , native_ordering );
6466- goto done ;
6469+ return PyBytesWriter_Finish (writer );
64676470 }
64686471
6472+ const char * encoding ;
64696473 if (byteorder < 0 ) {
64706474 encoding = "utf-16-le" ;
64716475 }
@@ -6476,10 +6480,11 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64766480 encoding = "utf-16" ;
64776481 }
64786482
6479- pos = 0 ;
6480- while ( pos < len ) {
6481- Py_ssize_t newpos , repsize , moreunits ;
6483+ PyObject * errorHandler = NULL ;
6484+ PyObject * exc = NULL ;
6485+ PyObject * rep = NULL ;
64826486
6487+ for (Py_ssize_t pos = 0 ; pos < len ; ) {
64836488 if (kind == PyUnicode_2BYTE_KIND ) {
64846489 pos += ucs2lib_utf16_encode ((const Py_UCS2 * )data + pos , len - pos ,
64856490 & out , native_ordering );
@@ -6492,13 +6497,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64926497 if (pos == len )
64936498 break ;
64946499
6500+ Py_ssize_t newpos ;
64956501 rep = unicode_encode_call_errorhandler (
64966502 errors , & errorHandler ,
64976503 encoding , "surrogates not allowed" ,
64986504 str , & exc , pos , pos + 1 , & newpos );
64996505 if (!rep )
65006506 goto error ;
65016507
6508+ Py_ssize_t repsize , moreunits ;
65026509 if (PyBytes_Check (rep )) {
65036510 repsize = PyBytes_GET_SIZE (rep );
65046511 if (repsize & 1 ) {
@@ -6524,21 +6531,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,
65246531
65256532 /* two bytes are reserved for each surrogate */
65266533 if (moreunits > 0 ) {
6527- Py_ssize_t outpos = out - (unsigned short * ) PyBytes_AS_STRING (v );
6528- if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE (v )) / 2 ) {
6529- /* integer overflow */
6530- PyErr_NoMemory ();
6534+ out = PyBytesWriter_GrowAndUpdatePointer (writer , 2 * moreunits , out );
6535+ if (out == NULL ) {
65316536 goto error ;
65326537 }
6533- if (_PyBytes_Resize (& v , PyBytes_GET_SIZE (v ) + 2 * moreunits ) < 0 )
6534- goto error ;
6535- out = (unsigned short * ) PyBytes_AS_STRING (v ) + outpos ;
65366538 }
65376539
65386540 if (PyBytes_Check (rep )) {
65396541 memcpy (out , PyBytes_AS_STRING (rep ), repsize );
65406542 out += repsize / 2 ;
6541- } else /* rep is unicode */ {
6543+ } else {
6544+ /* rep is unicode */
65426545 assert (PyUnicode_KIND (rep ) == PyUnicode_1BYTE_KIND );
65436546 ucs1lib_utf16_encode (PyUnicode_1BYTE_DATA (rep ), repsize ,
65446547 & out , native_ordering );
@@ -6547,23 +6550,20 @@ _PyUnicode_EncodeUTF16(PyObject *str,
65476550 Py_CLEAR (rep );
65486551 }
65496552
6553+ Py_XDECREF (errorHandler );
6554+ Py_XDECREF (exc );
6555+
65506556 /* Cut back to size actually needed. This is necessary for, for example,
65516557 encoding of a string containing isolated surrogates and the 'ignore' handler
65526558 is used. */
6553- nsize = (unsigned char * ) out - (unsigned char * ) PyBytes_AS_STRING (v );
6554- if (nsize != PyBytes_GET_SIZE (v ))
6555- _PyBytes_Resize (& v , nsize );
6556- Py_XDECREF (errorHandler );
6557- Py_XDECREF (exc );
6558- done :
6559- return v ;
6559+ return PyBytesWriter_FinishWithPointer (writer , out );
6560+
65606561 error :
65616562 Py_XDECREF (rep );
65626563 Py_XDECREF (errorHandler );
65636564 Py_XDECREF (exc );
6564- Py_XDECREF ( v );
6565+ PyBytesWriter_Discard ( writer );
65656566 return NULL ;
6566- #undef STORECHAR
65676567}
65686568
65696569PyObject *
0 commit comments