Skip to content

Commit 067f277

Browse files
committed
gh-139156: Use PyBytesWriter in UTF-16 encoder
Replace PyBytes_FromStringAndSize() and _PyBytes_Resize() with the PyBytesWriter API.
1 parent a756a4b commit 067f277

File tree

1 file changed

+52
-52
lines changed

1 file changed

+52
-52
lines changed

Objects/unicodeobject.c

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -6406,32 +6406,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64066406
const char *errors,
64076407
int byteorder)
64086408
{
6409-
int kind;
6410-
const void *data;
6411-
Py_ssize_t len;
6412-
PyObject *v;
6413-
unsigned short *out;
6414-
Py_ssize_t pairs;
6415-
#if PY_BIG_ENDIAN
6416-
int native_ordering = byteorder >= 0;
6417-
#else
6418-
int native_ordering = byteorder <= 0;
6419-
#endif
6420-
const char *encoding;
6421-
Py_ssize_t nsize, pos;
6422-
PyObject *errorHandler = NULL;
6423-
PyObject *exc = NULL;
6424-
PyObject *rep = NULL;
6425-
64266409
if (!PyUnicode_Check(str)) {
64276410
PyErr_BadArgument();
64286411
return NULL;
64296412
}
6430-
kind = PyUnicode_KIND(str);
6431-
data = PyUnicode_DATA(str);
6432-
len = PyUnicode_GET_LENGTH(str);
6413+
int kind = PyUnicode_KIND(str);
6414+
const void *data = PyUnicode_DATA(str);
6415+
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
64336416

6434-
pairs = 0;
6417+
Py_ssize_t pairs = 0;
64356418
if (kind == PyUnicode_4BYTE_KIND) {
64366419
const Py_UCS4 *in = (const Py_UCS4 *)data;
64376420
const Py_UCS4 *end = in + len;
@@ -6444,27 +6427,48 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64446427
if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) {
64456428
return PyErr_NoMemory();
64466429
}
6447-
nsize = len + pairs + (byteorder == 0);
6448-
v = PyBytes_FromStringAndSize(NULL, nsize * 2);
6449-
if (v == NULL) {
6430+
Py_ssize_t nsize = len + pairs + (byteorder == 0);
6431+
6432+
#if PY_BIG_ENDIAN
6433+
int native_ordering = byteorder >= 0;
6434+
#else
6435+
int native_ordering = byteorder <= 0;
6436+
#endif
6437+
6438+
if (kind == PyUnicode_1BYTE_KIND) {
6439+
PyObject *v = PyBytes_FromStringAndSize(NULL, nsize * 2);
6440+
if (v == NULL) {
6441+
return NULL;
6442+
}
6443+
6444+
/* output buffer is 2-bytes aligned */
6445+
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 2));
6446+
unsigned short *out = (unsigned short *)PyBytes_AS_STRING(v);
6447+
if (byteorder == 0) {
6448+
*out++ = 0xFEFF;
6449+
}
6450+
if (len > 0) {
6451+
ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
6452+
}
6453+
return v;
6454+
}
6455+
6456+
PyBytesWriter *writer = PyBytesWriter_Create(nsize * 2);
6457+
if (writer == NULL) {
64506458
return NULL;
64516459
}
64526460

64536461
/* output buffer is 2-bytes aligned */
6454-
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 2));
6455-
out = (unsigned short *)PyBytes_AS_STRING(v);
6462+
assert(_Py_IS_ALIGNED(PyBytesWriter_GetData(writer), 2));
6463+
unsigned short *out = PyBytesWriter_GetData(writer);
64566464
if (byteorder == 0) {
64576465
*out++ = 0xFEFF;
64586466
}
64596467
if (len == 0) {
6460-
goto done;
6461-
}
6462-
6463-
if (kind == PyUnicode_1BYTE_KIND) {
6464-
ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
6465-
goto done;
6468+
return PyBytesWriter_Finish(writer);
64666469
}
64676470

6471+
const char *encoding;
64686472
if (byteorder < 0) {
64696473
encoding = "utf-16-le";
64706474
}
@@ -6475,10 +6479,11 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64756479
encoding = "utf-16";
64766480
}
64776481

6478-
pos = 0;
6479-
while (pos < len) {
6480-
Py_ssize_t newpos, repsize, moreunits;
6482+
PyObject *errorHandler = NULL;
6483+
PyObject *exc = NULL;
6484+
PyObject *rep = NULL;
64816485

6486+
for (Py_ssize_t pos = 0; pos < len; ) {
64826487
if (kind == PyUnicode_2BYTE_KIND) {
64836488
pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
64846489
&out, native_ordering);
@@ -6491,13 +6496,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
64916496
if (pos == len)
64926497
break;
64936498

6499+
Py_ssize_t newpos;
64946500
rep = unicode_encode_call_errorhandler(
64956501
errors, &errorHandler,
64966502
encoding, "surrogates not allowed",
64976503
str, &exc, pos, pos + 1, &newpos);
64986504
if (!rep)
64996505
goto error;
65006506

6507+
Py_ssize_t repsize, moreunits;
65016508
if (PyBytes_Check(rep)) {
65026509
repsize = PyBytes_GET_SIZE(rep);
65036510
if (repsize & 1) {
@@ -6523,21 +6530,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,
65236530

65246531
/* two bytes are reserved for each surrogate */
65256532
if (moreunits > 0) {
6526-
Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
6527-
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 2) {
6528-
/* integer overflow */
6529-
PyErr_NoMemory();
6533+
out = PyBytesWriter_GrowAndUpdatePointer(writer, 2 * moreunits, out);
6534+
if (out == NULL) {
65306535
goto error;
65316536
}
6532-
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * moreunits) < 0)
6533-
goto error;
6534-
out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
65356537
}
65366538

65376539
if (PyBytes_Check(rep)) {
65386540
memcpy(out, PyBytes_AS_STRING(rep), repsize);
65396541
out += repsize / 2;
6540-
} else /* rep is unicode */ {
6542+
} else {
6543+
/* rep is unicode */
65416544
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
65426545
ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
65436546
&out, native_ordering);
@@ -6546,23 +6549,20 @@ _PyUnicode_EncodeUTF16(PyObject *str,
65466549
Py_CLEAR(rep);
65476550
}
65486551

6552+
Py_XDECREF(errorHandler);
6553+
Py_XDECREF(exc);
6554+
65496555
/* Cut back to size actually needed. This is necessary for, for example,
65506556
encoding of a string containing isolated surrogates and the 'ignore' handler
65516557
is used. */
6552-
nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
6553-
if (nsize != PyBytes_GET_SIZE(v))
6554-
_PyBytes_Resize(&v, nsize);
6555-
Py_XDECREF(errorHandler);
6556-
Py_XDECREF(exc);
6557-
done:
6558-
return v;
6558+
return PyBytesWriter_FinishWithPointer(writer, out);
6559+
65596560
error:
65606561
Py_XDECREF(rep);
65616562
Py_XDECREF(errorHandler);
65626563
Py_XDECREF(exc);
6563-
Py_XDECREF(v);
6564+
PyBytesWriter_Discard(writer);
65646565
return NULL;
6565-
#undef STORECHAR
65666566
}
65676567

65686568
PyObject *

0 commit comments

Comments
 (0)