Skip to content

Commit 0c9d5ad

Browse files
committed
Use new helpers in the xmlcharrefreplace handler.
1 parent d5796e6 commit 0c9d5ad

File tree

1 file changed

+62
-63
lines changed

1 file changed

+62
-63
lines changed

Python/codecs.c

Lines changed: 62 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,56 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730
}
731731

732732

733+
static inline void
734+
codec_handler_unicode_log10_max(Py_UCS4 ch, uint64_t *base, uint64_t *digits)
735+
{
736+
#define MAKE_BRANCH(D, N) \
737+
do { \
738+
if (ch < (N)) { \
739+
if (base != NULL) { \
740+
*base = (N); \
741+
} \
742+
if (digits != NULL) { \
743+
*digits = (D); \
744+
} \
745+
} \
746+
} while (0)
747+
MAKE_BRANCH(1, 10);
748+
MAKE_BRANCH(2, 100);
749+
MAKE_BRANCH(3, 1000);
750+
MAKE_BRANCH(4, 10000);
751+
MAKE_BRANCH(5, 100000);
752+
MAKE_BRANCH(6, 1000000);
753+
MAKE_BRANCH(7, 10000000);
754+
#undef MAKE_BRANCH
755+
Py_UNREACHABLE();
756+
}
757+
758+
759+
/*
760+
* Write the decimal representation of 'ch' to the buffer pointed by 'p'
761+
* using at most 7 characters prefixed by '&#' and suffixed by ';'.
762+
*/
763+
static inline void
764+
codec_handler_write_unicode_dec(Py_UCS1 **p, Py_UCS4 ch)
765+
{
766+
uint64_t base = 0, digits = 0;
767+
codec_handler_unicode_log10_max(ch, &base, &digits);
768+
assert(base != 0 && digits != 0);
769+
assert(digits <= 7);
770+
771+
*(*p)++ = '&';
772+
*(*p)++ = '#';
773+
while (digits-- > 0) {
774+
assert(base >= 1);
775+
*(*p)++ = '0' + ch / base;
776+
ch %= base;
777+
base /= 10;
778+
}
779+
*(*p)++ = ';';
780+
}
781+
782+
733783
// --- handler: 'strict' ------------------------------------------------------
734784

735785
PyObject *PyCodec_StrictErrors(PyObject *exc)
@@ -825,9 +875,12 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
825875
}
826876
}
827877

878+
879+
// --- handler: 'xmlcharrefreplace' -------------------------------------------
880+
828881
PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
829882
{
830-
if (!PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
883+
if (!_PyIsUnicodeEncodeError(exc)) {
831884
wrong_exception_type(exc);
832885
return NULL;
833886
}
@@ -856,28 +909,11 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
856909
for (Py_ssize_t i = start; i < end; ++i) {
857910
/* object is guaranteed to be "ready" */
858911
Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
859-
if (ch < 10) {
860-
ressize += 2 + 1 + 1;
861-
}
862-
else if (ch < 100) {
863-
ressize += 2 + 2 + 1;
864-
}
865-
else if (ch < 1000) {
866-
ressize += 2 + 3 + 1;
867-
}
868-
else if (ch < 10000) {
869-
ressize += 2 + 4 + 1;
870-
}
871-
else if (ch < 100000) {
872-
ressize += 2 + 5 + 1;
873-
}
874-
else if (ch < 1000000) {
875-
ressize += 2 + 6 + 1;
876-
}
877-
else {
878-
assert(ch < 10000000);
879-
ressize += 2 + 7 + 1;
880-
}
912+
uint64_t k = 0;
913+
codec_handler_unicode_log10_max(ch, NULL, &k);
914+
assert(k != 0);
915+
assert(k <= 7);
916+
ressize += 2 + k + 1;
881917
}
882918

883919
/* allocate replacement */
@@ -889,46 +925,8 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
889925
Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res);
890926
/* generate replacement */
891927
for (Py_ssize_t i = start; i < end; ++i) {
892-
int digits, base;
893928
Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i);
894-
if (ch < 10) {
895-
digits = 1;
896-
base = 1;
897-
}
898-
else if (ch < 100) {
899-
digits = 2;
900-
base = 10;
901-
}
902-
else if (ch < 1000) {
903-
digits = 3;
904-
base = 100;
905-
}
906-
else if (ch < 10000) {
907-
digits = 4;
908-
base = 1000;
909-
}
910-
else if (ch < 100000) {
911-
digits = 5;
912-
base = 10000;
913-
}
914-
else if (ch < 1000000) {
915-
digits = 6;
916-
base = 100000;
917-
}
918-
else {
919-
assert(ch < 10000000);
920-
digits = 7;
921-
base = 1000000;
922-
}
923-
*outp++ = '&';
924-
*outp++ = '#';
925-
while (digits-- > 0) {
926-
assert(base >= 1);
927-
*outp++ = '0' + ch / base;
928-
ch %= base;
929-
base /= 10;
930-
}
931-
*outp++ = ';';
929+
codec_handler_write_unicode_dec(&outp, ch);
932930
}
933931
assert(_PyUnicode_CheckConsistency(res, 1));
934932
PyObject *restuple = Py_BuildValue("(Nn)", res, end);
@@ -1419,7 +1417,8 @@ static PyObject *replace_errors(PyObject *self, PyObject *exc)
14191417
}
14201418

14211419

1422-
static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
1420+
static inline PyObject *
1421+
xmlcharrefreplace_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14231422
{
14241423
return PyCodec_XMLCharRefReplaceErrors(exc);
14251424
}

0 commit comments

Comments
 (0)