Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 52 additions & 52 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -6406,32 +6406,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
const char *errors,
int byteorder)
{
int kind;
const void *data;
Py_ssize_t len;
PyObject *v;
unsigned short *out;
Py_ssize_t pairs;
#if PY_BIG_ENDIAN
int native_ordering = byteorder >= 0;
#else
int native_ordering = byteorder <= 0;
#endif
const char *encoding;
Py_ssize_t nsize, pos;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;

if (!PyUnicode_Check(str)) {
PyErr_BadArgument();
return NULL;
}
kind = PyUnicode_KIND(str);
data = PyUnicode_DATA(str);
len = PyUnicode_GET_LENGTH(str);
int kind = PyUnicode_KIND(str);
const void *data = PyUnicode_DATA(str);
Py_ssize_t len = PyUnicode_GET_LENGTH(str);

pairs = 0;
Py_ssize_t pairs = 0;
if (kind == PyUnicode_4BYTE_KIND) {
const Py_UCS4 *in = (const Py_UCS4 *)data;
const Py_UCS4 *end = in + len;
Expand All @@ -6444,27 +6427,48 @@ _PyUnicode_EncodeUTF16(PyObject *str,
if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) {
return PyErr_NoMemory();
}
nsize = len + pairs + (byteorder == 0);
v = PyBytes_FromStringAndSize(NULL, nsize * 2);
if (v == NULL) {
Py_ssize_t nsize = len + pairs + (byteorder == 0);

#if PY_BIG_ENDIAN
int native_ordering = byteorder >= 0;
#else
int native_ordering = byteorder <= 0;
#endif

if (kind == PyUnicode_1BYTE_KIND) {
PyObject *v = PyBytes_FromStringAndSize(NULL, nsize * 2);
if (v == NULL) {
return NULL;
}

/* output buffer is 2-bytes aligned */
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 2));
unsigned short *out = (unsigned short *)PyBytes_AS_STRING(v);
if (byteorder == 0) {
*out++ = 0xFEFF;
}
if (len > 0) {
ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
}
return v;
}

PyBytesWriter *writer = PyBytesWriter_Create(nsize * 2);
if (writer == NULL) {
return NULL;
}

/* output buffer is 2-bytes aligned */
assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 2));
out = (unsigned short *)PyBytes_AS_STRING(v);
assert(_Py_IS_ALIGNED(PyBytesWriter_GetData(writer), 2));
unsigned short *out = PyBytesWriter_GetData(writer);
if (byteorder == 0) {
*out++ = 0xFEFF;
}
if (len == 0) {
goto done;
}

if (kind == PyUnicode_1BYTE_KIND) {
ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
goto done;
return PyBytesWriter_Finish(writer);
}

const char *encoding;
if (byteorder < 0) {
encoding = "utf-16-le";
}
Expand All @@ -6475,10 +6479,11 @@ _PyUnicode_EncodeUTF16(PyObject *str,
encoding = "utf-16";
}

pos = 0;
while (pos < len) {
Py_ssize_t newpos, repsize, moreunits;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *rep = NULL;

for (Py_ssize_t pos = 0; pos < len; ) {
if (kind == PyUnicode_2BYTE_KIND) {
pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
&out, native_ordering);
Expand All @@ -6491,13 +6496,15 @@ _PyUnicode_EncodeUTF16(PyObject *str,
if (pos == len)
break;

Py_ssize_t newpos;
rep = unicode_encode_call_errorhandler(
errors, &errorHandler,
encoding, "surrogates not allowed",
str, &exc, pos, pos + 1, &newpos);
if (!rep)
goto error;

Py_ssize_t repsize, moreunits;
if (PyBytes_Check(rep)) {
repsize = PyBytes_GET_SIZE(rep);
if (repsize & 1) {
Expand All @@ -6523,21 +6530,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,

/* two bytes are reserved for each surrogate */
if (moreunits > 0) {
Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 2) {
/* integer overflow */
PyErr_NoMemory();
out = PyBytesWriter_GrowAndUpdatePointer(writer, 2 * moreunits, out);
if (out == NULL) {
goto error;
}
if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * moreunits) < 0)
goto error;
out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
}

if (PyBytes_Check(rep)) {
memcpy(out, PyBytes_AS_STRING(rep), repsize);
out += repsize / 2;
} else /* rep is unicode */ {
} else {
/* rep is unicode */
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
&out, native_ordering);
Expand All @@ -6546,23 +6549,20 @@ _PyUnicode_EncodeUTF16(PyObject *str,
Py_CLEAR(rep);
}

Py_XDECREF(errorHandler);
Py_XDECREF(exc);

/* Cut back to size actually needed. This is necessary for, for example,
encoding of a string containing isolated surrogates and the 'ignore' handler
is used. */
nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
if (nsize != PyBytes_GET_SIZE(v))
_PyBytes_Resize(&v, nsize);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
done:
return v;
return PyBytesWriter_FinishWithPointer(writer, out);

error:
Py_XDECREF(rep);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
Py_XDECREF(v);
PyBytesWriter_Discard(writer);
return NULL;
#undef STORECHAR
}

PyObject *
Expand Down
Loading