Skip to content

Commit eb2237a

Browse files
committed
Use new helpers in the replace handler.
We also refactor that handler and extract the logic for each exceptions being handled into separate functions.
1 parent d5796e6 commit eb2237a

File tree

1 file changed

+63
-40
lines changed

1 file changed

+63
-40
lines changed

Python/codecs.c

Lines changed: 63 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -774,50 +774,72 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
774774
}
775775

776776

777-
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
777+
// --- handler: 'replace' -----------------------------------------------------
778+
779+
static PyObject *
780+
_PyCodec_ReplaceUnicodeEncodeError(PyObject *exc)
778781
{
779782
Py_ssize_t start, end, slen;
783+
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
784+
&start, &end, &slen, false) < 0) {
785+
return NULL;
786+
}
787+
PyObject *res = PyUnicode_New(slen, '?');
788+
if (res == NULL) {
789+
return NULL;
790+
}
791+
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
792+
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
793+
memset(outp, '?', sizeof(Py_UCS1) * slen);
794+
assert(_PyUnicode_CheckConsistency(res, 1));
795+
return Py_BuildValue("(Nn)", res, end);
796+
}
780797

781-
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
782-
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
783-
&start, &end, &slen, false) < 0) {
784-
return NULL;
785-
}
786-
PyObject *res = PyUnicode_New(slen, '?');
787-
if (res == NULL) {
788-
return NULL;
789-
}
790-
assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
791-
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
792-
memset(outp, '?', sizeof(Py_UCS1) * slen);
793-
assert(_PyUnicode_CheckConsistency(res, 1));
794-
return Py_BuildValue("(Nn)", res, end);
798+
799+
static PyObject *
800+
_PyCodec_ReplaceUnicodeDecodeError(PyObject *exc)
801+
{
802+
Py_ssize_t end;
803+
if (PyUnicodeDecodeError_GetEnd(exc, &end) < 0) {
804+
return NULL;
795805
}
796-
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
797-
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
798-
NULL, &end, NULL, true) < 0) {
799-
return NULL;
800-
}
801-
return Py_BuildValue("(Cn)",
802-
(int)Py_UNICODE_REPLACEMENT_CHARACTER,
803-
end);
806+
// Note: Py_UNICODE_REPLACEMENT_CHARACTER < (2 ** 16) < INT_MAX
807+
return Py_BuildValue("(Cn)", (int)Py_UNICODE_REPLACEMENT_CHARACTER, end);
808+
}
809+
810+
811+
static PyObject *
812+
_PyCodec_ReplaceUnicodeTranslateError(PyObject *exc)
813+
{
814+
Py_ssize_t start, end, slen;
815+
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
816+
&start, &end, &slen, false) < 0) {
817+
return NULL;
804818
}
805-
else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
806-
if (_PyUnicodeError_GetParams(exc, NULL, NULL,
807-
&start, &end, &slen, false) < 0) {
808-
return NULL;
809-
}
810-
PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER);
811-
if (res == NULL) {
812-
return NULL;
813-
}
814-
assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
815-
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
816-
for (Py_ssize_t i = 0; i < slen; ++i) {
817-
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
818-
}
819-
assert(_PyUnicode_CheckConsistency(res, 1));
820-
return Py_BuildValue("(Nn)", res, end);
819+
PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER);
820+
if (res == NULL) {
821+
return NULL;
822+
}
823+
assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
824+
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
825+
for (Py_ssize_t i = 0; i < slen; ++i) {
826+
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
827+
}
828+
assert(_PyUnicode_CheckConsistency(res, 1));
829+
return Py_BuildValue("(Nn)", res, end);
830+
}
831+
832+
833+
PyObject *PyCodec_ReplaceErrors(PyObject *exc)
834+
{
835+
if (_PyIsUnicodeEncodeError(exc)) {
836+
return _PyCodec_ReplaceUnicodeEncodeError(exc);
837+
}
838+
else if (_PyIsUnicodeDecodeError(exc)) {
839+
return _PyCodec_ReplaceUnicodeDecodeError(exc);
840+
}
841+
else if (_PyIsUnicodeTranslateError(exc)) {
842+
return _PyCodec_ReplaceUnicodeTranslateError(exc);
821843
}
822844
else {
823845
wrong_exception_type(exc);
@@ -1413,7 +1435,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14131435
}
14141436

14151437

1416-
static PyObject *replace_errors(PyObject *self, PyObject *exc)
1438+
static inline PyObject *
1439+
replace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14171440
{
14181441
return PyCodec_ReplaceErrors(exc);
14191442
}

0 commit comments

Comments
 (0)