Skip to content

Commit 713ece5

Browse files
committed
Get log10 only, fill buffer backwards
1 parent 7dfec2e commit 713ece5

File tree

1 file changed

+28
-50
lines changed

1 file changed

+28
-50
lines changed

Python/codecs.c

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -730,54 +730,20 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730
}
731731

732732

733-
static inline void
734-
codec_handler_unicode_log10_max(Py_UCS4 ch, int *base, int *digits)
735-
{
736-
#define MAKE_BRANCH(D, N) \
737-
do { \
738-
if (ch < 10 * (N)) { \
739-
if (base != NULL) { \
740-
*base = (N); \
741-
} \
742-
if (digits != NULL) { \
743-
*digits = (D); \
744-
} \
745-
return; \
746-
} \
747-
} while (0)
748-
MAKE_BRANCH(1, 1);
749-
MAKE_BRANCH(2, 10);
750-
MAKE_BRANCH(3, 100);
751-
MAKE_BRANCH(4, 1000);
752-
MAKE_BRANCH(5, 10000);
753-
MAKE_BRANCH(6, 100000);
754-
MAKE_BRANCH(7, 1000000);
755-
#undef MAKE_BRANCH
756-
Py_UNREACHABLE();
757-
}
758-
759-
760-
/*
761-
* Write the decimal representation of 'ch' to the buffer pointed by 'p'
762-
* using at most 7 characters prefixed by '&#' and suffixed by ';'.
733+
/* Determine the number of digits for a decimal representation of codepoint ch
763734
*/
764-
static inline void
765-
codec_handler_write_unicode_dec(Py_UCS1 **p, Py_UCS4 ch)
766-
{
767-
int base = 0, digits = 0;
768-
codec_handler_unicode_log10_max(ch, &base, &digits);
769-
assert(base != 0 && digits != 0);
770-
assert(digits <= 7);
771-
772-
*(*p)++ = '&';
773-
*(*p)++ = '#';
774-
while (digits-- > 0) {
775-
assert(base >= 1);
776-
*(*p)++ = '0' + ch / base;
777-
ch %= base;
778-
base /= 10;
779-
}
780-
*(*p)++ = ';';
735+
static inline int
736+
n_decimal_digits_for_codepoint(Py_UCS4 ch)
737+
{
738+
if (ch < 10) return 1;
739+
if (ch < 100) return 2;
740+
if (ch < 1000) return 3;
741+
if (ch < 10000) return 4;
742+
if (ch < 100000) return 5;
743+
if (ch < 1000000) return 6;
744+
if (ch < 10000000) return 7;
745+
// Unicode codepoints are limited to 1114111 (7 decimal digits)
746+
Py_UNREACHABLE();
781747
}
782748

783749
/*
@@ -951,8 +917,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
951917
for (Py_ssize_t i = start; i < end; ++i) {
952918
/* object is guaranteed to be "ready" */
953919
Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
954-
int k = 0;
955-
codec_handler_unicode_log10_max(ch, NULL, &k);
920+
int k = n_decimal_digits_for_codepoint(ch);
956921
assert(k != 0);
957922
assert(k <= 7);
958923
ressize += 2 + k + 1;
@@ -968,7 +933,20 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
968933
/* generate replacement */
969934
for (Py_ssize_t i = start; i < end; ++i) {
970935
Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
971-
codec_handler_write_unicode_dec(&outp, ch);
936+
/*
937+
* Write the decimal representation of 'ch' to the buffer pointed by 'p'
938+
* using at most 7 characters prefixed by '&#' and suffixed by ';'.
939+
*/
940+
*outp++ = '&';
941+
*outp++ = '#';
942+
Py_UCS1 *digit_end = outp + n_decimal_digits_for_codepoint(ch);
943+
for (Py_UCS1 *digitp = digit_end - 1; digitp >= outp; --digitp) {
944+
*digitp = '0' + (ch % 10);
945+
ch /= 10;
946+
}
947+
assert(ch == 0);
948+
outp = digit_end;
949+
*outp++ = ';';
972950
}
973951
assert(_PyUnicode_CheckConsistency(res, 1));
974952
PyObject *restuple = Py_BuildValue("(Nn)", res, end);

0 commit comments

Comments
 (0)