From 947fcfa83c543281c0da33fab742fb45846a2f46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 3 Dec 2024 16:33:07 +0100
Subject: [PATCH 1/5] fix UBSan failure in `unicodeobject.c`

---
 Objects/unicodeobject.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 463da06445984b..03103b3da0401b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5018,7 +5018,8 @@ ctz(size_t v)
 #define HAVE_CTZ 0
 #endif
 
-#if HAVE_CTZ && PY_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
+#if HAVE_CTZ
 // load p[0]..p[size-1] as a size_t without unaligned access nor read ahead.
 static size_t
 load_unaligned(const unsigned char *p, size_t size)
@@ -5065,6 +5066,21 @@ load_unaligned(const unsigned char *p, size_t size)
     return u.s;
 }
 #endif
+#  if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
+// x86 and amd64 are little endian and can load unaligned memory.
+#    if defined(__clang__) && defined(__has_feature)    \
+        && __has_feature(undefined_behavior_sanitizer)
+static inline size_t
+__attribute__((no_sanitize("alignment")))
+load_unaligned_x86_amd64(const unsigned char *p)
+{
+    return *(const size_t *)p;
+}
+#    else
+#      define load_unaligned_x86_amd64(p)   *(const size_t *)p
+#    endif
+#endif
+#endif // PY_LITTLE_ENDIAN
 
 /*
  * Find the first non-ASCII character in a byte sequence.
@@ -5084,8 +5100,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
 #if PY_LITTLE_ENDIAN && HAVE_CTZ
         if (p < p2) {
 #if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-            // x86 and amd64 are little endian and can load unaligned memory.
-            size_t u = *(const size_t*)p & ASCII_CHAR_MASK;
+            size_t u = load_unaligned_x86_amd64(p) & ASCII_CHAR_MASK;
 #else
             size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
 #endif

From 8d8fe33ce07e52b42f57c13f57ba8b34ec084fc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 3 Dec 2024 16:55:57 +0100
Subject: [PATCH 2/5] fix compilation?

---
 Objects/unicodeobject.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 03103b3da0401b..5163e5c4305ec4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5068,14 +5068,17 @@ load_unaligned(const unsigned char *p, size_t size)
 #endif
 #  if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
 // x86 and amd64 are little endian and can load unaligned memory.
-#    if defined(__clang__) && defined(__has_feature)    \
-        && __has_feature(undefined_behavior_sanitizer)
+#    if defined(__clang__) && defined(__has_feature)
+#      if __has_feature(undefined_behavior_sanitizer)
 static inline size_t
 __attribute__((no_sanitize("alignment")))
 load_unaligned_x86_amd64(const unsigned char *p)
 {
     return *(const size_t *)p;
 }
+#      else
+#        define load_unaligned_x86_amd64(p)   *(const size_t *)p
+#      endif
 #    else
 #      define load_unaligned_x86_amd64(p)   *(const size_t *)p
 #    endif

From a807b63b3846a1eb3e6feaae689cab40d24d0212 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 3 Dec 2024 17:01:52 +0100
Subject: [PATCH 3/5] simplify if-guards

---
 Objects/unicodeobject.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5163e5c4305ec4..fbe54a4052c292 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5019,7 +5019,7 @@ ctz(size_t v)
 #endif
 
 #if PY_LITTLE_ENDIAN
-#if HAVE_CTZ
+#  if HAVE_CTZ
 // load p[0]..p[size-1] as a size_t without unaligned access nor read ahead.
 static size_t
 load_unaligned(const unsigned char *p, size_t size)
@@ -5065,24 +5065,20 @@ load_unaligned(const unsigned char *p, size_t size)
     }
     return u.s;
 }
-#endif
+#  endif
 #  if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
 // x86 and amd64 are little endian and can load unaligned memory.
-#    if defined(__clang__) && defined(__has_feature)
-#      if __has_feature(undefined_behavior_sanitizer)
+#    if defined(__clang__)
 static inline size_t
 __attribute__((no_sanitize("alignment")))
 load_unaligned_x86_amd64(const unsigned char *p)
 {
     return *(const size_t *)p;
 }
-#      else
-#        define load_unaligned_x86_amd64(p)   *(const size_t *)p
-#      endif
 #    else
 #      define load_unaligned_x86_amd64(p)   *(const size_t *)p
 #    endif
-#endif
+#  endif
 #endif // PY_LITTLE_ENDIAN
 
 /*

From c6af019d28a245e629d844d56263a06d3f222dc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 5 Dec 2024 15:07:15 +0100
Subject: [PATCH 4/5] use memcpy() instead of `no_sanitize` features

---
 Objects/unicodeobject.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index fbe54a4052c292..d2b4d9a658a6a9 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5018,8 +5018,7 @@ ctz(size_t v)
 #define HAVE_CTZ 0
 #endif
 
-#if PY_LITTLE_ENDIAN
-#  if HAVE_CTZ
+#if HAVE_CTZ && PY_LITTLE_ENDIAN
 // load p[0]..p[size-1] as a size_t without unaligned access nor read ahead.
 static size_t
 load_unaligned(const unsigned char *p, size_t size)
@@ -5065,21 +5064,7 @@ load_unaligned(const unsigned char *p, size_t size)
     }
     return u.s;
 }
-#  endif
-#  if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-// x86 and amd64 are little endian and can load unaligned memory.
-#    if defined(__clang__)
-static inline size_t
-__attribute__((no_sanitize("alignment")))
-load_unaligned_x86_amd64(const unsigned char *p)
-{
-    return *(const size_t *)p;
-}
-#    else
-#      define load_unaligned_x86_amd64(p)   *(const size_t *)p
-#    endif
-#  endif
-#endif // PY_LITTLE_ENDIAN
+#endif
 
 /*
  * Find the first non-ASCII character in a byte sequence.
@@ -5098,10 +5083,13 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
         const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T);
 #if PY_LITTLE_ENDIAN && HAVE_CTZ
         if (p < p2) {
+            size_t u;
 #if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-            size_t u = load_unaligned_x86_amd64(p) & ASCII_CHAR_MASK;
+            // x86 and amd64 are little endian and can load unaligned memory.
+            memcpy(&u, p, sizeof(size_t));
+            u &= ASCII_CHAR_MASK;
 #else
-            size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
+            u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
 #endif
             if (u) {
                 return (ctz(u) - 7) / 8;

From 5b957a8f70050f51797eb64342cd3c4faa43e28a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:31:49 +0100
Subject: [PATCH 5/5] Use `memcpy` on non-x86/amd64 little-endian platforms.

---
 Objects/unicodeobject.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d2b4d9a658a6a9..33c4747bbef488 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5084,13 +5084,8 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
 #if PY_LITTLE_ENDIAN && HAVE_CTZ
         if (p < p2) {
             size_t u;
-#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
-            // x86 and amd64 are little endian and can load unaligned memory.
             memcpy(&u, p, sizeof(size_t));
             u &= ASCII_CHAR_MASK;
-#else
-            u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
-#endif
             if (u) {
                 return (ctz(u) - 7) / 8;
             }