Skip to content

Commit f9ae3cd

Browse files
committed
Use _PyUnicodeError_GetParams for the backslashreplace handler.
We also refactor that handler and extract the logic for each exceptions being handled into separate functions.
1 parent d5796e6 commit f9ae3cd

File tree

1 file changed

+70
-41
lines changed

1 file changed

+70
-41
lines changed

Python/codecs.c

Lines changed: 70 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -936,49 +936,18 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
936936
return restuple;
937937
}
938938

939-
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
939+
940+
// --- handler: 'backslashreplace' --------------------------------------------
941+
942+
static PyObject *
943+
_PyCodec_BackslashReplaceUnicodeEncodeError(PyObject *exc)
940944
{
941945
PyObject *obj;
942946
Py_ssize_t objlen, start, end, slen;
943-
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
944-
if (_PyUnicodeError_GetParams(exc,
945-
&obj, &objlen,
946-
&start, &end, &slen, true) < 0)
947-
{
948-
return NULL;
949-
}
950-
PyObject *res = PyUnicode_New(4 * slen, 127);
951-
if (res == NULL) {
952-
Py_DECREF(obj);
953-
return NULL;
954-
}
955-
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
956-
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
957-
for (Py_ssize_t i = start; i < end; i++, outp += 4) {
958-
const unsigned char ch = p[i];
959-
outp[0] = '\\';
960-
outp[1] = 'x';
961-
outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
962-
outp[3] = Py_hexdigits[ch & 0xf];
963-
}
964-
assert(_PyUnicode_CheckConsistency(res, 1));
965-
Py_DECREF(obj);
966-
return Py_BuildValue("(Nn)", res, end);
967-
}
968-
969-
if (
970-
PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)
971-
|| PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)
972-
) {
973-
if (_PyUnicodeError_GetParams(exc,
974-
&obj, &objlen,
975-
&start, &end, &slen, false) < 0)
976-
{
977-
return NULL;
978-
}
979-
}
980-
else {
981-
wrong_exception_type(exc);
947+
if (_PyUnicodeError_GetParams(exc,
948+
&obj, &objlen,
949+
&start, &end, &slen, false) < 0)
950+
{
982951
return NULL;
983952
}
984953

@@ -1015,6 +984,65 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
1015984
}
1016985

1017986

987+
static PyObject *
988+
_PyCodec_BackslashReplaceUnicodeDecodeError(PyObject *exc)
989+
{
990+
PyObject *obj;
991+
Py_ssize_t objlen, start, end, slen;
992+
if (_PyUnicodeError_GetParams(exc,
993+
&obj, &objlen,
994+
&start, &end, &slen, true) < 0)
995+
{
996+
return NULL;
997+
}
998+
999+
PyObject *res = PyUnicode_New(4 * slen, 127);
1000+
if (res == NULL) {
1001+
Py_DECREF(obj);
1002+
return NULL;
1003+
}
1004+
1005+
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
1006+
const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj);
1007+
for (Py_ssize_t i = start; i < end; i++, outp += 4) {
1008+
const unsigned char ch = p[i];
1009+
outp[0] = '\\';
1010+
outp[1] = 'x';
1011+
outp[2] = Py_hexdigits[(ch >> 4) & 0xf];
1012+
outp[3] = Py_hexdigits[ch & 0xf];
1013+
}
1014+
assert(_PyUnicode_CheckConsistency(res, 1));
1015+
Py_DECREF(obj);
1016+
return Py_BuildValue("(Nn)", res, end);
1017+
}
1018+
1019+
1020+
static inline PyObject *
1021+
_PyCodec_BackslashReplaceUnicodeTranslateError(PyObject *exc)
1022+
{
1023+
// Same implementation as for UnicodeEncodeError objects.
1024+
return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
1025+
}
1026+
1027+
1028+
PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
1029+
{
1030+
if (_PyIsUnicodeDecodeError(exc)) {
1031+
return _PyCodec_BackslashReplaceUnicodeDecodeError(exc);
1032+
}
1033+
else if (_PyIsUnicodeDecodeError(exc)) {
1034+
return _PyCodec_BackslashReplaceUnicodeEncodeError(exc);
1035+
}
1036+
else if (_PyIsUnicodeTranslateError(exc)) {
1037+
return _PyCodec_BackslashReplaceUnicodeTranslateError(exc);
1038+
}
1039+
else {
1040+
wrong_exception_type(exc);
1041+
return NULL;
1042+
}
1043+
}
1044+
1045+
10181046
// --- handler: 'namereplace' -------------------------------------------------
10191047

10201048
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
@@ -1425,7 +1453,8 @@ static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
14251453
}
14261454

14271455

1428-
static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
1456+
static inline PyObject *
1457+
backslashreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14291458
{
14301459
return PyCodec_BackslashReplaceErrors(exc);
14311460
}

0 commit comments

Comments
 (0)