Skip to content

Commit f563b42

Browse files
committed
micro optimization for x86
1 parent e3adab4 commit f563b42

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

Objects/unicodeobject.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5068,12 +5068,19 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
50685068

50695069
if (end - start >= SIZEOF_SIZE_T) {
50705070
const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T);
5071+
if (p < p2) {
50715072
#if HAVE_CTZ
5072-
size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
5073-
if (u) {
5074-
return p - start + (ctz(u) - 7) / 8;
5073+
#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
5074+
// x86 and amd64 are little endian and can load unaligned memory.
5075+
size_t u = *(const size_t*)p & ASCII_CHAR_MASK;
5076+
#else
5077+
size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
5078+
#endif
5079+
if (u) {
5080+
return p - start + (ctz(u) - 7) / 8;
5081+
}
5082+
p = p2;
50755083
}
5076-
p = p2;
50775084
#else
50785085
while (p < p2) {
50795086
if (*p & 0x80) {
@@ -5098,6 +5105,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
50985105
}
50995106
}
51005107
#if HAVE_CTZ
5108+
// we can not use *(const size_t*)p to avoid buffer overrun.
51015109
size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK;
51025110
if (u) {
51035111
return p - start + (ctz(u) - 7) / 8;

0 commit comments

Comments
 (0)