@@ -780,6 +780,26 @@ codec_handler_write_unicode_dec(Py_UCS1 **p, Py_UCS4 ch)
780780    * (* p )++  =  ';' ;
781781}
782782
783+ /* 
784+  * Create a Unicode string containing 'count' copies of the official 
785+  * Unicode REPLACEMENT CHARACTER (0xFFFD). 
786+  */ 
787+ static  PyObject  * 
788+ codec_handler_unicode_replacement_character (Py_ssize_t  count )
789+ {
790+     PyObject  * res  =  PyUnicode_New (count , Py_UNICODE_REPLACEMENT_CHARACTER );
791+     if  (res  ==  NULL ) {
792+         return  NULL ;
793+     }
794+     assert (count  ==  0  ||  PyUnicode_KIND (res ) ==  PyUnicode_2BYTE_KIND );
795+     Py_UCS2  * outp  =  PyUnicode_2BYTE_DATA (res );
796+     for  (Py_ssize_t  i  =  0 ; i  <  count ; ++ i ) {
797+         outp [i ] =  Py_UNICODE_REPLACEMENT_CHARACTER ;
798+     }
799+     assert (_PyUnicode_CheckConsistency (res , 1 ));
800+     return  res ;
801+ }
802+ 
783803
784804// --- handler: 'strict' ------------------------------------------------------ 
785805
@@ -825,50 +845,71 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
825845}
826846
827847
828- PyObject  * PyCodec_ReplaceErrors (PyObject  * exc )
848+ // --- handler: 'replace' ----------------------------------------------------- 
849+ 
850+ static  PyObject  * 
851+ _PyCodec_ReplaceUnicodeEncodeError (PyObject  * exc )
829852{
830853    Py_ssize_t  start , end , slen ;
854+     if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
855+                                   & start , & end , & slen , false) <  0 )
856+     {
857+         return  NULL ;
858+     }
859+     PyObject  * res  =  PyUnicode_New (slen , '?' );
860+     if  (res  ==  NULL ) {
861+         return  NULL ;
862+     }
863+     assert (PyUnicode_KIND (res ) ==  PyUnicode_1BYTE_KIND );
864+     Py_UCS1  * outp  =  PyUnicode_1BYTE_DATA (res );
865+     memset (outp , '?' , sizeof (Py_UCS1 ) *  slen );
866+     assert (_PyUnicode_CheckConsistency (res , 1 ));
867+     return  Py_BuildValue ("(Nn)" , res , end );
868+ }
831869
832-     if  (PyObject_TypeCheck (exc , (PyTypeObject  * )PyExc_UnicodeEncodeError )) {
833-         if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
834-                                       & start , & end , & slen , false) <  0 ) {
835-             return  NULL ;
836-         }
837-         PyObject  * res  =  PyUnicode_New (slen , '?' );
838-         if  (res  ==  NULL ) {
839-             return  NULL ;
840-         }
841-         assert (PyUnicode_KIND (res ) ==  PyUnicode_1BYTE_KIND );
842-         Py_UCS1  * outp  =  PyUnicode_1BYTE_DATA (res );
843-         memset (outp , '?' , sizeof (Py_UCS1 ) *  slen );
844-         assert (_PyUnicode_CheckConsistency (res , 1 ));
845-         return  Py_BuildValue ("(Nn)" , res , end );
870+ 
871+ static  PyObject  * 
872+ _PyCodec_ReplaceUnicodeDecodeError (PyObject  * exc )
873+ {
874+     Py_ssize_t  end ;
875+     if  (PyUnicodeDecodeError_GetEnd (exc , & end ) <  0 ) {
876+         return  NULL ;
846877    }
847-     else  if  (PyObject_TypeCheck (exc , (PyTypeObject  * )PyExc_UnicodeDecodeError )) {
848-         if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
849-                                       NULL , & end , NULL , true) <  0 ) {
850-             return  NULL ;
851-         }
852-         return  Py_BuildValue ("(Cn)" ,
853-                              (int )Py_UNICODE_REPLACEMENT_CHARACTER ,
854-                              end );
878+     PyObject  * res  =  codec_handler_unicode_replacement_character (1 );
879+     if  (res  ==  NULL ) {
880+         return  NULL ;
855881    }
856-     else  if  (PyObject_TypeCheck (exc , (PyTypeObject  * )PyExc_UnicodeTranslateError )) {
857-         if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
858-                                       & start , & end , & slen , false) <  0 ) {
859-             return  NULL ;
860-         }
861-         PyObject  * res  =  PyUnicode_New (slen , Py_UNICODE_REPLACEMENT_CHARACTER );
862-         if  (res  ==  NULL ) {
863-             return  NULL ;
864-         }
865-         assert (slen  ==  0  ||  PyUnicode_KIND (res ) ==  PyUnicode_2BYTE_KIND );
866-         Py_UCS2  * outp  =  PyUnicode_2BYTE_DATA (res );
867-         for  (Py_ssize_t  i  =  0 ; i  <  slen ; ++ i ) {
868-             outp [i ] =  Py_UNICODE_REPLACEMENT_CHARACTER ;
869-         }
870-         assert (_PyUnicode_CheckConsistency (res , 1 ));
871-         return  Py_BuildValue ("(Nn)" , res , end );
882+     return  Py_BuildValue ("(Nn)" , res , end );
883+ }
884+ 
885+ 
886+ static  PyObject  * 
887+ _PyCodec_ReplaceUnicodeTranslateError (PyObject  * exc )
888+ {
889+     Py_ssize_t  start , end , slen ;
890+     if  (_PyUnicodeError_GetParams (exc , NULL , NULL ,
891+                                   & start , & end , & slen , false) <  0 )
892+     {
893+         return  NULL ;
894+     }
895+     PyObject  * res  =  codec_handler_unicode_replacement_character (slen );
896+     if  (res  ==  NULL ) {
897+         return  NULL ;
898+     }
899+     return  Py_BuildValue ("(Nn)" , res , end );
900+ }
901+ 
902+ 
903+ PyObject  * PyCodec_ReplaceErrors (PyObject  * exc )
904+ {
905+     if  (_PyIsUnicodeEncodeError (exc )) {
906+         return  _PyCodec_ReplaceUnicodeEncodeError (exc );
907+     }
908+     else  if  (_PyIsUnicodeDecodeError (exc )) {
909+         return  _PyCodec_ReplaceUnicodeDecodeError (exc );
910+     }
911+     else  if  (_PyIsUnicodeTranslateError (exc )) {
912+         return  _PyCodec_ReplaceUnicodeTranslateError (exc );
872913    }
873914    else  {
874915        wrong_exception_type (exc );
@@ -1467,7 +1508,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14671508}
14681509
14691510
1470- static  PyObject  * replace_errors (PyObject  * self , PyObject  * exc )
1511+ static  inline  PyObject  * 
1512+ replace_errors (PyObject  * Py_UNUSED (self ), PyObject  * exc )
14711513{
14721514    return  PyCodec_ReplaceErrors (exc );
14731515}
0 commit comments