@@ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730}
731731
732732
733+ /* 
734+  * Create a Unicode string containing 'count' copies of the official 
735+  * Unicode REPLACEMENT CHARACTER (0xFFFD). 
736+  */ 
737+ static  PyObject  * 
738+ codec_handler_unicode_replacement_character (Py_ssize_t  count )
739+ {
740+     PyObject  * res  =  PyUnicode_New (count , Py_UNICODE_REPLACEMENT_CHARACTER );
741+     if  (res  ==  NULL ) {
742+         return  NULL ;
743+     }
744+     assert (count  ==  0  ||  PyUnicode_KIND (res ) ==  PyUnicode_2BYTE_KIND );
745+     Py_UCS2  * outp  =  PyUnicode_2BYTE_DATA (res );
746+     for  (Py_ssize_t  i  =  0 ; i  <  count ; ++ i ) {
747+         outp [i ] =  Py_UNICODE_REPLACEMENT_CHARACTER ;
748+     }
749+     assert (_PyUnicode_CheckConsistency (res , 1 ));
750+     return  res ;
751+ }
752+ 
753+ 
733754// --- handler: 'strict' ------------------------------------------------------ 
734755
735756PyObject  * PyCodec_StrictErrors (PyObject  * exc )
@@ -774,50 +795,71 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
774795}
775796
776797
777- PyObject  * PyCodec_ReplaceErrors (PyObject  * exc )
798+ // --- handler: 'replace' ----------------------------------------------------- 
799+ 
800+ static  PyObject  * 
801+ _PyCodec_ReplaceUnicodeEncodeError (PyObject  * exc )
778802{
779803    Py_ssize_t  start , end , slen ;
804+     if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
805+                                   & start , & end , & slen , false) <  0 )
806+     {
807+         return  NULL ;
808+     }
809+     PyObject  * res  =  PyUnicode_New (slen , '?' );
810+     if  (res  ==  NULL ) {
811+         return  NULL ;
812+     }
813+     assert (PyUnicode_KIND (res ) ==  PyUnicode_1BYTE_KIND );
814+     Py_UCS1  * outp  =  PyUnicode_1BYTE_DATA (res );
815+     memset (outp , '?' , sizeof (Py_UCS1 ) *  slen );
816+     assert (_PyUnicode_CheckConsistency (res , 1 ));
817+     return  Py_BuildValue ("(Nn)" , res , end );
818+ }
780819
781-     if  (PyObject_TypeCheck (exc , (PyTypeObject  * )PyExc_UnicodeEncodeError )) {
782-         if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
783-                                       & start , & end , & slen , false) <  0 ) {
784-             return  NULL ;
785-         }
786-         PyObject  * res  =  PyUnicode_New (slen , '?' );
787-         if  (res  ==  NULL ) {
788-             return  NULL ;
789-         }
790-         assert (PyUnicode_KIND (res ) ==  PyUnicode_1BYTE_KIND );
791-         Py_UCS1  * outp  =  PyUnicode_1BYTE_DATA (res );
792-         memset (outp , '?' , sizeof (Py_UCS1 ) *  slen );
793-         assert (_PyUnicode_CheckConsistency (res , 1 ));
794-         return  Py_BuildValue ("(Nn)" , res , end );
820+ 
821+ static  PyObject  * 
822+ _PyCodec_ReplaceUnicodeDecodeError (PyObject  * exc )
823+ {
824+     Py_ssize_t  end ;
825+     if  (PyUnicodeDecodeError_GetEnd (exc , & end ) <  0 ) {
826+         return  NULL ;
795827    }
796-     else  if  (PyObject_TypeCheck (exc , (PyTypeObject  * )PyExc_UnicodeDecodeError )) {
797-         if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
798-                                       NULL , & end , NULL , true) <  0 ) {
799-             return  NULL ;
800-         }
801-         return  Py_BuildValue ("(Cn)" ,
802-                              (int )Py_UNICODE_REPLACEMENT_CHARACTER ,
803-                              end );
828+     PyObject  * res  =  codec_handler_unicode_replacement_character (1 );
829+     if  (res  ==  NULL ) {
830+         return  NULL ;
804831    }
805-     else  if  (PyObject_TypeCheck (exc , (PyTypeObject  * )PyExc_UnicodeTranslateError )) {
806-         if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
807-                                       & start , & end , & slen , false) <  0 ) {
808-             return  NULL ;
809-         }
810-         PyObject  * res  =  PyUnicode_New (slen , Py_UNICODE_REPLACEMENT_CHARACTER );
811-         if  (res  ==  NULL ) {
812-             return  NULL ;
813-         }
814-         assert (slen  ==  0  ||  PyUnicode_KIND (res ) ==  PyUnicode_2BYTE_KIND );
815-         Py_UCS2  * outp  =  PyUnicode_2BYTE_DATA (res );
816-         for  (Py_ssize_t  i  =  0 ; i  <  slen ; ++ i ) {
817-             outp [i ] =  Py_UNICODE_REPLACEMENT_CHARACTER ;
818-         }
819-         assert (_PyUnicode_CheckConsistency (res , 1 ));
820-         return  Py_BuildValue ("(Nn)" , res , end );
832+     return  Py_BuildValue ("(Nn)" , res , end );
833+ }
834+ 
835+ 
836+ static  PyObject  * 
837+ _PyCodec_ReplaceUnicodeTranslateError (PyObject  * exc )
838+ {
839+     Py_ssize_t  start , end , slen ;
840+     if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
841+                                   & start , & end , & slen , false) <  0 )
842+     {
843+         return  NULL ;
844+     }
845+     PyObject  * res  =  codec_handler_unicode_replacement_character (slen );
846+     if  (res  ==  NULL ) {
847+         return  NULL ;
848+     }
849+     return  Py_BuildValue ("(Nn)" , res , end );
850+ }
851+ 
852+ 
853+ PyObject  * PyCodec_ReplaceErrors (PyObject  * exc )
854+ {
855+     if  (_PyIsUnicodeEncodeError (exc )) {
856+         return  _PyCodec_ReplaceUnicodeEncodeError (exc );
857+     }
858+     else  if  (_PyIsUnicodeDecodeError (exc )) {
859+         return  _PyCodec_ReplaceUnicodeDecodeError (exc );
860+     }
861+     else  if  (_PyIsUnicodeTranslateError (exc )) {
862+         return  _PyCodec_ReplaceUnicodeTranslateError (exc );
821863    }
822864    else  {
823865        wrong_exception_type (exc );
@@ -1468,7 +1510,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14681510}
14691511
14701512
1471- static  PyObject  * replace_errors (PyObject  * self , PyObject  * exc )
1513+ static  inline  PyObject  * 
1514+ replace_errors (PyObject  * Py_UNUSED (self ), PyObject  * exc )
14721515{
14731516    return  PyCodec_ReplaceErrors (exc );
14741517}
0 commit comments