Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix :func:`locale.strxfrm` on Solaris (and possibly other platforms).
49 changes: 48 additions & 1 deletion Modules/_localemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,54 @@ _locale_strxfrm_impl(PyObject *module, PyObject *str)
goto exit;
}
}
result = PyUnicode_FromWideChar(buf, n2);
/* The result is just a sequence of integers, they are not necessary
Unicode code points, so PyUnicode_FromWideChar() cannot be used
here. For example, 0xD83D 0xDC0D should not be larger than 0xFF41.
*/
#if SIZEOF_WCHAR_T == 4
{
/* Some codes can exceed the range of Unicode code points
(0 - 0x10FFFF), so they cannot be directly used in
PyUnicode_FromKindAndData(). They should be first encoded in
a way that preserves the lexicographical order.

Codes in the range 0-0xFFFF represent themself.
Codes larger than 0xFFFF are encoded as a pair:
* 0x1xxxx -- the highest 16 bits
* 0x0xxxx -- the lowest 16 bits
*/
size_t n3 = 0;
for (size_t i = 0; i < n2; i++) {
if ((Py_UCS4)buf[i] > 0x10000u) {
n3++;
}
}
if (n3) {
n3 += n2; // no integer overflow
Py_UCS4 *buf2 = PyMem_New(Py_UCS4, n3);
if (buf2 == NULL) {
PyErr_NoMemory();
goto exit;
}
size_t j = 0;
for (size_t i = 0; i < n2; i++) {
Py_UCS4 c = (Py_UCS4)buf[i];
if (c > 0x10000u) {
buf2[j++] = (c >> 16) | 0x10000u;
buf2[j++] = c & 0xFFFFu;
}
else {
buf2[j++] = c;
}
}
assert(j == n3);
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf2, n3);
PyMem_Free(buf2);
goto exit;
}
}
#endif
result = PyUnicode_FromKindAndData(sizeof(wchar_t), buf, n2);
exit:
PyMem_Free(buf);
PyMem_Free(s);
Expand Down
Loading