-
-
Notifications
You must be signed in to change notification settings - Fork 33.2k
GH-123894: Simplify ascii decode using unaligned loads #123895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4973,58 +4973,32 @@ static Py_ssize_t | |
| ascii_decode(const char *start, const char *end, Py_UCS1 *dest) | ||
| { | ||
| const char *p = start; | ||
|
|
||
| #if SIZEOF_SIZE_T <= SIZEOF_VOID_P | ||
| if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T) | ||
| && _Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)) | ||
| { | ||
| /* Fast path, see in STRINGLIB(utf8_decode) for | ||
| an explanation. */ | ||
| /* Help allocation */ | ||
| const char *_p = p; | ||
| Py_UCS1 * q = dest; | ||
| while (_p + SIZEOF_SIZE_T <= end) { | ||
| size_t value = *(const size_t *) _p; | ||
| if (value & ASCII_CHAR_MASK) | ||
| break; | ||
| *((size_t *)q) = value; | ||
| _p += SIZEOF_SIZE_T; | ||
| q += SIZEOF_SIZE_T; | ||
| } | ||
| p = _p; | ||
| while (p < end) { | ||
| if ((unsigned char)*p & 0x80) | ||
| break; | ||
| *q++ = *p++; | ||
| Py_UCS1 *q = dest; | ||
| const char *size_t_end = end - (SIZEOF_SIZE_T - 1); | ||
|
|
||
| while (p < size_t_end) { | ||
| /* Loading and storing a size_t using memcpy leads to unaligned loads | ||
| for platforms that can handle it. */ | ||
| const size_t *restrict _p = (const size_t *)p; | ||
| size_t *restrict _q = (size_t *)q; | ||
| size_t value; | ||
| memcpy(&value, _p, SIZEOF_SIZE_T); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How are performances affected by those multiple memcpy() calls? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Memcpy isn't called. Memcpy with a given size gets optimized to a single load. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see the ASM yes. I'd suggest reformulating the comment by saying that "When supported, compilers optimize
|
||
| if (value & ASCII_CHAR_MASK) { | ||
| break; | ||
| } | ||
| return p - start; | ||
| memcpy(_q, &value, SIZEOF_SIZE_T); | ||
| p += SIZEOF_SIZE_T; | ||
| q += SIZEOF_SIZE_T; | ||
| } | ||
| #endif | ||
| while (p < end) { | ||
| /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h | ||
| for an explanation. */ | ||
| if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { | ||
| /* Help allocation */ | ||
| const char *_p = p; | ||
| while (_p + SIZEOF_SIZE_T <= end) { | ||
| size_t value = *(const size_t *) _p; | ||
| if (value & ASCII_CHAR_MASK) | ||
| break; | ||
| _p += SIZEOF_SIZE_T; | ||
| } | ||
| p = _p; | ||
| if (_p == end) | ||
| break; | ||
| } | ||
| if ((unsigned char)*p & 0x80) | ||
| if ((unsigned char)*p & 0x80) { | ||
| break; | ||
| ++p; | ||
| } | ||
| *q++ = *p++; | ||
| } | ||
| memcpy(dest, start, p - start); | ||
| return p - start; | ||
| } | ||
|
|
||
|
|
||
| static int | ||
| unicode_decode_utf8_impl(_PyUnicodeWriter *writer, | ||
| const char *starts, const char *s, const char *end, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure if we already discussed it but would
_Py_ALIGN_DOWN(end, SIZEOF_SIZE_T)work?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably. Is using a macro here important?