@@ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730}
731731
732732
733+ /*
734+ * Create a Unicode string containing 'count' copies of the official
735+ * Unicode REPLACEMENT CHARACTER (0xFFFD).
736+ */
737+ static PyObject *
738+ codec_handler_unicode_replacement_character (Py_ssize_t count )
739+ {
740+ PyObject * res = PyUnicode_New (count , Py_UNICODE_REPLACEMENT_CHARACTER );
741+ if (res == NULL ) {
742+ return NULL ;
743+ }
744+ assert (count == 0 || PyUnicode_KIND (res ) == PyUnicode_2BYTE_KIND );
745+ Py_UCS2 * outp = PyUnicode_2BYTE_DATA (res );
746+ for (Py_ssize_t i = 0 ; i < count ; ++ i ) {
747+ outp [i ] = Py_UNICODE_REPLACEMENT_CHARACTER ;
748+ }
749+ assert (_PyUnicode_CheckConsistency (res , 1 ));
750+ return res ;
751+ }
752+
753+
733754// --- handler: 'strict' ------------------------------------------------------
734755
735756PyObject * PyCodec_StrictErrors (PyObject * exc )
@@ -804,8 +825,11 @@ _PyCodec_ReplaceUnicodeDecodeError(PyObject *exc)
804825 if (PyUnicodeDecodeError_GetEnd (exc , & end ) < 0 ) {
805826 return NULL ;
806827 }
807- // Note: Py_UNICODE_REPLACEMENT_CHARACTER < (2 ** 16) < INT_MAX
808- return Py_BuildValue ("(Cn)" , (int )Py_UNICODE_REPLACEMENT_CHARACTER , end );
828+ PyObject * res = codec_handler_unicode_replacement_character (1 );
829+ if (res == NULL ) {
830+ return NULL ;
831+ }
832+ return Py_BuildValue ("(Nn)" , res , end );
809833}
810834
811835
@@ -818,16 +842,10 @@ _PyCodec_ReplaceUnicodeTranslateError(PyObject *exc)
818842 {
819843 return NULL ;
820844 }
821- PyObject * res = PyUnicode_New (slen , Py_UNICODE_REPLACEMENT_CHARACTER );
845+ PyObject * res = codec_handler_unicode_replacement_character (slen );
822846 if (res == NULL ) {
823847 return NULL ;
824848 }
825- assert (slen == 0 || PyUnicode_KIND (res ) == PyUnicode_2BYTE_KIND );
826- Py_UCS2 * outp = PyUnicode_2BYTE_DATA (res );
827- for (Py_ssize_t i = 0 ; i < slen ; ++ i ) {
828- outp [i ] = Py_UNICODE_REPLACEMENT_CHARACTER ;
829- }
830- assert (_PyUnicode_CheckConsistency (res , 1 ));
831849 return Py_BuildValue ("(Nn)" , res , end );
832850}
833851
0 commit comments