diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2494c989544ca0..12826db09d4543 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4973,58 +4973,32 @@ static Py_ssize_t ascii_decode(const char *start, const char *end, Py_UCS1 *dest) { const char *p = start; - -#if SIZEOF_SIZE_T <= SIZEOF_VOID_P - if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T) - && _Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)) - { - /* Fast path, see in STRINGLIB(utf8_decode) for - an explanation. */ - /* Help allocation */ - const char *_p = p; - Py_UCS1 * q = dest; - while (_p + SIZEOF_SIZE_T <= end) { - size_t value = *(const size_t *) _p; - if (value & ASCII_CHAR_MASK) - break; - *((size_t *)q) = value; - _p += SIZEOF_SIZE_T; - q += SIZEOF_SIZE_T; - } - p = _p; - while (p < end) { - if ((unsigned char)*p & 0x80) - break; - *q++ = *p++; + Py_UCS1 *q = dest; + const char *size_t_end = end - (SIZEOF_SIZE_T - 1); + + while (p < size_t_end) { + /* Loading and storing a size_t using memcpy leads to unaligned loads + for platforms that can handle it. */ + const size_t *restrict _p = (const size_t *)p; + size_t *restrict _q = (size_t *)q; + size_t value; + memcpy(&value, _p, SIZEOF_SIZE_T); + if (value & ASCII_CHAR_MASK) { + break; } - return p - start; + memcpy(_q, &value, SIZEOF_SIZE_T); + p += SIZEOF_SIZE_T; + q += SIZEOF_SIZE_T; } -#endif while (p < end) { - /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h - for an explanation. */ - if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { - /* Help allocation */ - const char *_p = p; - while (_p + SIZEOF_SIZE_T <= end) { - size_t value = *(const size_t *) _p; - if (value & ASCII_CHAR_MASK) - break; - _p += SIZEOF_SIZE_T; - } - p = _p; - if (_p == end) - break; - } - if ((unsigned char)*p & 0x80) + if ((unsigned char)*p & 0x80) { break; - ++p; + } + *q++ = *p++; } - memcpy(dest, start, p - start); return p - start; } - static int unicode_decode_utf8_impl(_PyUnicodeWriter *writer, const char *starts, const char *s, const char *end,