@@ -6407,32 +6407,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
6407
6407
const char * errors ,
6408
6408
int byteorder )
6409
6409
{
6410
- int kind ;
6411
- const void * data ;
6412
- Py_ssize_t len ;
6413
- PyObject * v ;
6414
- unsigned short * out ;
6415
- Py_ssize_t pairs ;
6416
- #if PY_BIG_ENDIAN
6417
- int native_ordering = byteorder >= 0 ;
6418
- #else
6419
- int native_ordering = byteorder <= 0 ;
6420
- #endif
6421
- const char * encoding ;
6422
- Py_ssize_t nsize , pos ;
6423
- PyObject * errorHandler = NULL ;
6424
- PyObject * exc = NULL ;
6425
- PyObject * rep = NULL ;
6426
-
6427
6410
if (!PyUnicode_Check (str )) {
6428
6411
PyErr_BadArgument ();
6429
6412
return NULL ;
6430
6413
}
6431
- kind = PyUnicode_KIND (str );
6432
- data = PyUnicode_DATA (str );
6433
- len = PyUnicode_GET_LENGTH (str );
6414
+ int kind = PyUnicode_KIND (str );
6415
+ const void * data = PyUnicode_DATA (str );
6416
+ Py_ssize_t len = PyUnicode_GET_LENGTH (str );
6434
6417
6435
- pairs = 0 ;
6418
+ Py_ssize_t pairs = 0 ;
6436
6419
if (kind == PyUnicode_4BYTE_KIND ) {
6437
6420
const Py_UCS4 * in = (const Py_UCS4 * )data ;
6438
6421
const Py_UCS4 * end = in + len ;
@@ -6445,27 +6428,48 @@ _PyUnicode_EncodeUTF16(PyObject *str,
6445
6428
if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0 )) {
6446
6429
return PyErr_NoMemory ();
6447
6430
}
6448
- nsize = len + pairs + (byteorder == 0 );
6449
- v = PyBytes_FromStringAndSize (NULL , nsize * 2 );
6450
- if (v == NULL ) {
6431
+ Py_ssize_t nsize = len + pairs + (byteorder == 0 );
6432
+
6433
+ #if PY_BIG_ENDIAN
6434
+ int native_ordering = byteorder >= 0 ;
6435
+ #else
6436
+ int native_ordering = byteorder <= 0 ;
6437
+ #endif
6438
+
6439
+ if (kind == PyUnicode_1BYTE_KIND ) {
6440
+ PyObject * v = PyBytes_FromStringAndSize (NULL , nsize * 2 );
6441
+ if (v == NULL ) {
6442
+ return NULL ;
6443
+ }
6444
+
6445
+ /* output buffer is 2-bytes aligned */
6446
+ assert (_Py_IS_ALIGNED (PyBytes_AS_STRING (v ), 2 ));
6447
+ unsigned short * out = (unsigned short * )PyBytes_AS_STRING (v );
6448
+ if (byteorder == 0 ) {
6449
+ * out ++ = 0xFEFF ;
6450
+ }
6451
+ if (len > 0 ) {
6452
+ ucs1lib_utf16_encode ((const Py_UCS1 * )data , len , & out , native_ordering );
6453
+ }
6454
+ return v ;
6455
+ }
6456
+
6457
+ PyBytesWriter * writer = PyBytesWriter_Create (nsize * 2 );
6458
+ if (writer == NULL ) {
6451
6459
return NULL ;
6452
6460
}
6453
6461
6454
6462
/* output buffer is 2-bytes aligned */
6455
- assert (_Py_IS_ALIGNED (PyBytes_AS_STRING ( v ), 2 ));
6456
- out = ( unsigned short * ) PyBytes_AS_STRING ( v );
6463
+ assert (_Py_IS_ALIGNED (PyBytesWriter_GetData ( writer ), 2 ));
6464
+ unsigned short * out = PyBytesWriter_GetData ( writer );
6457
6465
if (byteorder == 0 ) {
6458
6466
* out ++ = 0xFEFF ;
6459
6467
}
6460
6468
if (len == 0 ) {
6461
- goto done ;
6462
- }
6463
-
6464
- if (kind == PyUnicode_1BYTE_KIND ) {
6465
- ucs1lib_utf16_encode ((const Py_UCS1 * )data , len , & out , native_ordering );
6466
- goto done ;
6469
+ return PyBytesWriter_Finish (writer );
6467
6470
}
6468
6471
6472
+ const char * encoding ;
6469
6473
if (byteorder < 0 ) {
6470
6474
encoding = "utf-16-le" ;
6471
6475
}
@@ -6476,10 +6480,11 @@ _PyUnicode_EncodeUTF16(PyObject *str,
6476
6480
encoding = "utf-16" ;
6477
6481
}
6478
6482
6479
- pos = 0 ;
6480
- while ( pos < len ) {
6481
- Py_ssize_t newpos , repsize , moreunits ;
6483
+ PyObject * errorHandler = NULL ;
6484
+ PyObject * exc = NULL ;
6485
+ PyObject * rep = NULL ;
6482
6486
6487
+ for (Py_ssize_t pos = 0 ; pos < len ; ) {
6483
6488
if (kind == PyUnicode_2BYTE_KIND ) {
6484
6489
pos += ucs2lib_utf16_encode ((const Py_UCS2 * )data + pos , len - pos ,
6485
6490
& out , native_ordering );
@@ -6492,13 +6497,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
6492
6497
if (pos == len )
6493
6498
break ;
6494
6499
6500
+ Py_ssize_t newpos ;
6495
6501
rep = unicode_encode_call_errorhandler (
6496
6502
errors , & errorHandler ,
6497
6503
encoding , "surrogates not allowed" ,
6498
6504
str , & exc , pos , pos + 1 , & newpos );
6499
6505
if (!rep )
6500
6506
goto error ;
6501
6507
6508
+ Py_ssize_t repsize , moreunits ;
6502
6509
if (PyBytes_Check (rep )) {
6503
6510
repsize = PyBytes_GET_SIZE (rep );
6504
6511
if (repsize & 1 ) {
@@ -6524,21 +6531,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,
6524
6531
6525
6532
/* two bytes are reserved for each surrogate */
6526
6533
if (moreunits > 0 ) {
6527
- Py_ssize_t outpos = out - (unsigned short * ) PyBytes_AS_STRING (v );
6528
- if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE (v )) / 2 ) {
6529
- /* integer overflow */
6530
- PyErr_NoMemory ();
6534
+ out = PyBytesWriter_GrowAndUpdatePointer (writer , 2 * moreunits , out );
6535
+ if (out == NULL ) {
6531
6536
goto error ;
6532
6537
}
6533
- if (_PyBytes_Resize (& v , PyBytes_GET_SIZE (v ) + 2 * moreunits ) < 0 )
6534
- goto error ;
6535
- out = (unsigned short * ) PyBytes_AS_STRING (v ) + outpos ;
6536
6538
}
6537
6539
6538
6540
if (PyBytes_Check (rep )) {
6539
6541
memcpy (out , PyBytes_AS_STRING (rep ), repsize );
6540
6542
out += repsize / 2 ;
6541
- } else /* rep is unicode */ {
6543
+ } else {
6544
+ /* rep is unicode */
6542
6545
assert (PyUnicode_KIND (rep ) == PyUnicode_1BYTE_KIND );
6543
6546
ucs1lib_utf16_encode (PyUnicode_1BYTE_DATA (rep ), repsize ,
6544
6547
& out , native_ordering );
@@ -6547,23 +6550,20 @@ _PyUnicode_EncodeUTF16(PyObject *str,
6547
6550
Py_CLEAR (rep );
6548
6551
}
6549
6552
6553
+ Py_XDECREF (errorHandler );
6554
+ Py_XDECREF (exc );
6555
+
6550
6556
/* Cut back to size actually needed. This is necessary for, for example,
6551
6557
encoding of a string containing isolated surrogates and the 'ignore' handler
6552
6558
is used. */
6553
- nsize = (unsigned char * ) out - (unsigned char * ) PyBytes_AS_STRING (v );
6554
- if (nsize != PyBytes_GET_SIZE (v ))
6555
- _PyBytes_Resize (& v , nsize );
6556
- Py_XDECREF (errorHandler );
6557
- Py_XDECREF (exc );
6558
- done :
6559
- return v ;
6559
+ return PyBytesWriter_FinishWithPointer (writer , out );
6560
+
6560
6561
error :
6561
6562
Py_XDECREF (rep );
6562
6563
Py_XDECREF (errorHandler );
6563
6564
Py_XDECREF (exc );
6564
- Py_XDECREF ( v );
6565
+ PyBytesWriter_Discard ( writer );
6565
6566
return NULL ;
6566
- #undef STORECHAR
6567
6567
}
6568
6568
6569
6569
PyObject *
0 commit comments