@@ -730,6 +730,56 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730}
731731
732732
733+ static  inline  void 
734+ codec_handler_unicode_log10_max (Py_UCS4  ch , uint64_t  * base , uint64_t  * digits )
735+ {
736+ #define  MAKE_BRANCH (D , N )           \
737+     do {                            \
738+         if (ch < (N)) {             \
739+             if (base != NULL) {     \
740+                 *base = (N);        \
741+             }                       \
742+             if (digits != NULL) {   \
743+                 *digits = (D);      \
744+             }                       \
745+         }                           \
746+     } while (0)
747+     MAKE_BRANCH (1 , 10 );
748+     MAKE_BRANCH (2 , 100 );
749+     MAKE_BRANCH (3 , 1000 );
750+     MAKE_BRANCH (4 , 10000 );
751+     MAKE_BRANCH (5 , 100000 );
752+     MAKE_BRANCH (6 , 1000000 );
753+     MAKE_BRANCH (7 , 10000000 );
754+ #undef  MAKE_BRANCH
755+     Py_UNREACHABLE ();
756+ }
757+ 
758+ 
759+ /* 
760+  * Write the decimal representation of 'ch' to the buffer pointed by 'p' 
761+  * using at most 7 characters prefixed by '&#' and suffixed by ';'. 
762+  */ 
763+ static  inline  void 
764+ codec_handler_write_unicode_dec (Py_UCS1  * * p , Py_UCS4  ch )
765+ {
766+     uint64_t  base  =  0 , digits  =  0 ;
767+     codec_handler_unicode_log10_max (ch , & base , & digits );
768+     assert (base  !=  0  &&  digits  !=  0 );
769+     assert (digits  <= 7 );
770+ 
771+     * (* p )++  =  '&' ;
772+     * (* p )++  =  '#' ;
773+     while  (digits --  >  0 ) {
774+         assert (base  >= 1 );
775+         * (* p )++  =  '0'  +  ch  / base ;
776+         ch  %= base ;
777+         base  /= 10 ;
778+     }
779+     * (* p )++  =  ';' ;
780+ }
781+ 
782+ 
733783// --- handler: 'strict' ------------------------------------------------------ 
734784
735785PyObject  * PyCodec_StrictErrors (PyObject  * exc )
@@ -825,9 +875,12 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
825875    }
826876}
827877
878+ 
879+ // --- handler: 'xmlcharrefreplace' ------------------------------------------- 
880+ 
828881PyObject  * PyCodec_XMLCharRefReplaceErrors (PyObject  * exc )
829882{
830-     if  (!PyObject_TypeCheck (exc , ( PyTypeObject   * ) PyExc_UnicodeEncodeError )) {
883+     if  (!_PyIsUnicodeEncodeError (exc )) {
831884        wrong_exception_type (exc );
832885        return  NULL ;
833886    }
@@ -856,28 +909,11 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
856909    for  (Py_ssize_t  i  =  start ; i  <  end ; ++ i ) {
857910        /* object is guaranteed to be "ready" */ 
858911        Py_UCS4  ch  =  PyUnicode_READ_CHAR (obj , i );
859-         if  (ch  <  10 ) {
860-             ressize  +=  2  +  1  +  1 ;
861-         }
862-         else  if  (ch  <  100 ) {
863-             ressize  +=  2  +  2  +  1 ;
864-         }
865-         else  if  (ch  <  1000 ) {
866-             ressize  +=  2  +  3  +  1 ;
867-         }
868-         else  if  (ch  <  10000 ) {
869-             ressize  +=  2  +  4  +  1 ;
870-         }
871-         else  if  (ch  <  100000 ) {
872-             ressize  +=  2  +  5  +  1 ;
873-         }
874-         else  if  (ch  <  1000000 ) {
875-             ressize  +=  2  +  6  +  1 ;
876-         }
877-         else  {
878-             assert (ch  <  10000000 );
879-             ressize  +=  2  +  7  +  1 ;
880-         }
912+         uint64_t  k  =  0 ;
913+         codec_handler_unicode_log10_max (ch , NULL , & k );
914+         assert (k  !=  0 );
915+         assert (k  <= 7 );
916+         ressize  +=  2  +  k  +  1 ;
881917    }
882918
883919    /* allocate replacement */ 
@@ -889,46 +925,8 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
889925    Py_UCS1  * outp  =  PyUnicode_1BYTE_DATA (res );
890926    /* generate replacement */ 
891927    for  (Py_ssize_t  i  =  start ; i  <  end ; ++ i ) {
892-         int  digits , base ;
893928        Py_UCS4  ch  =  PyUnicode_READ_CHAR (obj , i );
894-         if  (ch  <  10 ) {
895-             digits  =  1 ;
896-             base  =  1 ;
897-         }
898-         else  if  (ch  <  100 ) {
899-             digits  =  2 ;
900-             base  =  10 ;
901-         }
902-         else  if  (ch  <  1000 ) {
903-             digits  =  3 ;
904-             base  =  100 ;
905-         }
906-         else  if  (ch  <  10000 ) {
907-             digits  =  4 ;
908-             base  =  1000 ;
909-         }
910-         else  if  (ch  <  100000 ) {
911-             digits  =  5 ;
912-             base  =  10000 ;
913-         }
914-         else  if  (ch  <  1000000 ) {
915-             digits  =  6 ;
916-             base  =  100000 ;
917-         }
918-         else  {
919-             assert (ch  <  10000000 );
920-             digits  =  7 ;
921-             base  =  1000000 ;
922-         }
923-         * outp ++  =  '&' ;
924-         * outp ++  =  '#' ;
925-         while  (digits --  >  0 ) {
926-             assert (base  >= 1 );
927-             * outp ++  =  '0'  +  ch  / base ;
928-             ch  %= base ;
929-             base  /= 10 ;
930-         }
931-         * outp ++  =  ';' ;
929+         codec_handler_write_unicode_dec (& outp , ch );
932930    }
933931    assert (_PyUnicode_CheckConsistency (res , 1 ));
934932    PyObject  * restuple  =  Py_BuildValue ("(Nn)" , res , end );
@@ -1419,7 +1417,8 @@ static PyObject *replace_errors(PyObject *self, PyObject *exc)
14191417}
14201418
14211419
1422- static  PyObject  * xmlcharrefreplace_errors (PyObject  * self , PyObject  * exc )
1420+ static  inline  PyObject  * 
1421+ xmlcharrefreplace_errors (PyObject  * Py_UNUSED (self ), PyObject  * exc )
14231422{
14241423    return  PyCodec_XMLCharRefReplaceErrors (exc );
14251424}
0 commit comments