@@ -730,54 +730,20 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730}
731731
732732
733- static inline void
734- codec_handler_unicode_log10_max (Py_UCS4 ch , int * base , int * digits )
735- {
736- #define MAKE_BRANCH (D , N ) \
737- do { \
738- if (ch < 10 * (N)) { \
739- if (base != NULL) { \
740- *base = (N); \
741- } \
742- if (digits != NULL) { \
743- *digits = (D); \
744- } \
745- return; \
746- } \
747- } while (0)
748- MAKE_BRANCH (1 , 1 );
749- MAKE_BRANCH (2 , 10 );
750- MAKE_BRANCH (3 , 100 );
751- MAKE_BRANCH (4 , 1000 );
752- MAKE_BRANCH (5 , 10000 );
753- MAKE_BRANCH (6 , 100000 );
754- MAKE_BRANCH (7 , 1000000 );
755- #undef MAKE_BRANCH
756- Py_UNREACHABLE ();
757- }
758-
759-
760- /*
761- * Write the decimal representation of 'ch' to the buffer pointed by 'p'
762- * using at most 7 characters prefixed by '&#' and suffixed by ';'.
733+ /* Determine the number of digits for a decimal representation of codepoint ch
763734 */
764- static inline void
765- codec_handler_write_unicode_dec (Py_UCS1 * * p , Py_UCS4 ch )
766- {
767- int base = 0 , digits = 0 ;
768- codec_handler_unicode_log10_max (ch , & base , & digits );
769- assert (base != 0 && digits != 0 );
770- assert (digits <= 7 );
771-
772- * (* p )++ = '&' ;
773- * (* p )++ = '#' ;
774- while (digits -- > 0 ) {
775- assert (base >= 1 );
776- * (* p )++ = '0' + ch / base ;
777- ch %= base ;
778- base /= 10 ;
779- }
780- * (* p )++ = ';' ;
735+ static inline int
736+ n_decimal_digits_for_codepoint (Py_UCS4 ch )
737+ {
738+ if (ch < 10 ) return 1 ;
739+ if (ch < 100 ) return 2 ;
740+ if (ch < 1000 ) return 3 ;
741+ if (ch < 10000 ) return 4 ;
742+ if (ch < 100000 ) return 5 ;
743+ if (ch < 1000000 ) return 6 ;
744+ if (ch < 10000000 ) return 7 ;
745+ // Unicode codepoints are limited to 1114111 (7 decimal digits)
746+ Py_UNREACHABLE ();
781747}
782748
783749/*
@@ -951,8 +917,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
951917 for (Py_ssize_t i = start ; i < end ; ++ i ) {
952918 /* object is guaranteed to be "ready" */
953919 Py_UCS4 ch = PyUnicode_READ_CHAR (obj , i );
954- int k = 0 ;
955- codec_handler_unicode_log10_max (ch , NULL , & k );
920+ int k = n_decimal_digits_for_codepoint (ch );
956921 assert (k != 0 );
957922 assert (k <= 7 );
958923 ressize += 2 + k + 1 ;
@@ -968,7 +933,20 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
968933 /* generate replacement */
969934 for (Py_ssize_t i = start ; i < end ; ++ i ) {
970935 Py_UCS4 ch = PyUnicode_READ_CHAR (obj , i );
971- codec_handler_write_unicode_dec (& outp , ch );
936+ /*
937+ * Write the decimal representation of 'ch' to the buffer pointed by 'p'
938+ * using at most 7 characters prefixed by '&#' and suffixed by ';'.
939+ */
940+ * outp ++ = '&' ;
941+ * outp ++ = '#' ;
942+ Py_UCS1 * digit_end = outp + n_decimal_digits_for_codepoint (ch );
943+ for (Py_UCS1 * digitp = digit_end - 1 ; digitp >= outp ; -- digitp ) {
944+ * digitp = '0' + (ch % 10 );
945+ ch /= 10 ;
946+ }
947+ assert (ch == 0 );
948+ outp = digit_end ;
949+ * outp ++ = ';' ;
972950 }
973951 assert (_PyUnicode_CheckConsistency (res , 1 ));
974952 PyObject * restuple = Py_BuildValue ("(Nn)" , res , end );
0 commit comments