Skip to content

Commit d78e395

Browse files
committed
create utility for creating a string with N copies of 0xFFFD
1 parent aaa1e4a commit d78e395

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

Python/codecs.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730
}
731731

732732

733+
/*
734+
* Create a Unicode string containing 'count' copies of the official
735+
* Unicode REPLACEMENT CHARACTER (0xFFFD).
736+
*/
737+
static PyObject *
738+
codec_handler_unicode_replacement_character(Py_ssize_t count)
739+
{
740+
PyObject *res = PyUnicode_New(count, Py_UNICODE_REPLACEMENT_CHARACTER);
741+
if (res == NULL) {
742+
return NULL;
743+
}
744+
assert(count == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
745+
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
746+
for (Py_ssize_t i = 0; i < count; ++i) {
747+
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
748+
}
749+
assert(_PyUnicode_CheckConsistency(res, 1));
750+
return res;
751+
}
752+
753+
733754
// --- handler: 'strict' ------------------------------------------------------
734755

735756
PyObject *PyCodec_StrictErrors(PyObject *exc)
@@ -804,8 +825,11 @@ _PyCodec_ReplaceUnicodeDecodeError(PyObject *exc)
804825
if (PyUnicodeDecodeError_GetEnd(exc, &end) < 0) {
805826
return NULL;
806827
}
807-
// Note: Py_UNICODE_REPLACEMENT_CHARACTER < (2 ** 16) < INT_MAX
808-
return Py_BuildValue("(Cn)", (int)Py_UNICODE_REPLACEMENT_CHARACTER, end);
828+
PyObject *res = codec_handler_unicode_replacement_character(1);
829+
if (res == NULL) {
830+
return NULL;
831+
}
832+
return Py_BuildValue("(Nn)", res, end);
809833
}
810834

811835

@@ -818,16 +842,10 @@ _PyCodec_ReplaceUnicodeTranslateError(PyObject *exc)
818842
{
819843
return NULL;
820844
}
821-
PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER);
845+
PyObject *res = codec_handler_unicode_replacement_character(slen);
822846
if (res == NULL) {
823847
return NULL;
824848
}
825-
assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
826-
Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res);
827-
for (Py_ssize_t i = 0; i < slen; ++i) {
828-
outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
829-
}
830-
assert(_PyUnicode_CheckConsistency(res, 1));
831849
return Py_BuildValue("(Nn)", res, end);
832850
}
833851

0 commit comments

Comments
 (0)