From 27af83d2f897d1861d146761392dca8c1682ab1c Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 06:21:16 -0600
Subject: [PATCH 01/11] is_utf8_invariant_string_loc: mv declarations, do init

This moves two variable declarations, making the ARGS_ASSERT macro
first, and it combines one declaration with its initialization.
---
 inline.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/inline.h b/inline.h
index af1ec26c8b56..6dd41db9a229 100644
--- a/inline.h
+++ b/inline.h
@@ -1515,16 +1515,15 @@ C<L</is_utf8_string>> and C<L</is_utf8_fixed_width_buf_flags>>.
 PERL_STATIC_INLINE bool
 Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 {
-    const U8* send;
-    const U8* x = s;
-
     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 
+    const U8* send = s + len;
+    const U8* x = s;
+
     if (len == 0) {
         len = strlen((const char *)s);
     }
 
-    send = s + len;
 
 #ifndef EBCDIC
 

From 3c4053dfbba44166813d43690ca3f7caeff12446 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 06:05:44 -0600
Subject: [PATCH 02/11] variant_under_utf8_count: mv ARGS_ASSERT

Better to have this first in the function
---
 inline.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inline.h b/inline.h
index 6dd41db9a229..b19c9c3b774c 100644
--- a/inline.h
+++ b/inline.h
@@ -2062,11 +2062,11 @@ C<L<perlapi/is_utf8_invariant_string_loc>>,
 PERL_STATIC_INLINE Size_t
 S_variant_under_utf8_count(const U8* const s, const U8* const e)
 {
+    PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
+
     const U8* x = s;
     Size_t count = 0;
 
-    PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
-
 #  ifndef EBCDIC
 
     /* Test if the string is long enough to use word-at-a-time.  (Logic is the

From f7dcc4f5e2f1097aaa73529b5fc287250fe5ee8a Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 06:23:26 -0600
Subject: [PATCH 03/11] regexec.c: Move declarations to the point of
 initialization

---
 regexec.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/regexec.c b/regexec.c
index d5e14c51beab..cac29563b6bd 100644
--- a/regexec.c
+++ b/regexec.c
@@ -636,7 +636,6 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
                           - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
     {
-        PERL_UINTMAX_T span_word;
 
         /* Process per-byte until reach word boundary.  XXX This loop could be
          * eliminated if we knew that this platform had fast unaligned reads */
@@ -648,7 +647,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
         }
 
         /* Create a word filled with the bytes we are spanning */
-        span_word = PERL_COUNT_MULTIPLIER * span_byte;
+        PERL_UINTMAX_T span_word = PERL_COUNT_MULTIPLIER * span_byte;
 
         /* Process per-word as long as we have at least a full word left */
         do {
@@ -712,7 +711,6 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
                           - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
     {
-        PERL_UINTMAX_T word, mask_word;
 
         while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
             if (((*s) & mask) == byte) {
@@ -721,8 +719,8 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
             s++;
         }
 
-        word      = PERL_COUNT_MULTIPLIER * byte;
-        mask_word = PERL_COUNT_MULTIPLIER * mask;
+        PERL_UINTMAX_T word = PERL_COUNT_MULTIPLIER * byte;
+        PERL_UINTMAX_T mask_word = PERL_COUNT_MULTIPLIER * mask;
 
         do {
             PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word;
@@ -787,7 +785,6 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
                           - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
     {
-        PERL_UINTMAX_T span_word, mask_word;
 
         while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
             if (((*s) & mask) != span_byte) {
@@ -796,8 +793,8 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
             s++;
         }
 
-        span_word = PERL_COUNT_MULTIPLIER * span_byte;
-        mask_word = PERL_COUNT_MULTIPLIER * mask;
+        PERL_UINTMAX_T span_word = PERL_COUNT_MULTIPLIER * span_byte;
+        PERL_UINTMAX_T mask_word = PERL_COUNT_MULTIPLIER * mask;
 
         do {
             PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word;

From c1b1e8b2bcd3da39dc362da3e75d9ed8f325cb69 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Tue, 21 Oct 2025 07:16:59 -0600
Subject: [PATCH 04/11] Create BYTES_REMAINING_IN_WORD()

This macro encapsulates the task of finding how far until the next word
boundary the passed-in address is.

There are several places that could use this, but instead of converting
use this in those places, the next commit will create macros that depend
on this one and those places will instead convert to use those other new
macros.
---
 inline.h | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/inline.h b/inline.h
index b19c9c3b774c..d8ea259429fd 100644
--- a/inline.h
+++ b/inline.h
@@ -1459,14 +1459,21 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
 #  define PERL_WORDSIZE            sizeof(PERL_UINTMAX_T)
 #  define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
 
-/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
- * or'ing together the lowest bits of 'x'.  Hopefully the final term gets
- * optimized out completely on a 32-bit system, and its mask gets optimized out
- * on a 64-bit system */
-#  define PERL_IS_SUBWORD_ADDR(x) (1 & (       PTR2nat(x)                     \
-                                      |   (  PTR2nat(x) >> 1)                 \
-                                      | ( ( (PTR2nat(x)                       \
-                                           & PERL_WORD_BOUNDARY_MASK) >> 2))))
+/* Given an address of a byte 'x', how many bytes away is that address to the
+ * following closest full word boundary. */
+#  define BYTES_REMAINING_IN_WORD(x)                                        \
+              ( (PERL_WORDSIZE - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))    \
+               & PERL_WORD_BOUNDARY_MASK)
+/* For example, consider two addresses in an 8 byte word size (the dots are
+ * don't cares):
+ *      0b...............010                0b...............000
+ *      ((8 - (0b1101010 & 0x7)) & 0x7)     ((8 - (0b1101000 & 0x7)) & 0x7)
+ *      ((8 - 0b10) & 0x7)                  ((8 - 0) & 0x7)
+ *      (6 & 0x7)                           (8 & 0x7)
+ *      6                                   0                              */
+
+/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1 */
+#  define PERL_IS_SUBWORD_ADDR(x) (BYTES_REMAINING_IN_WORD(x) != 0)
 
 /*
 =for apidoc      is_utf8_invariant_string
@@ -2116,6 +2123,7 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
 #  undef PERL_COUNT_MULTIPLIER
 #  undef PERL_WORD_BOUNDARY_MASK
 #  undef PERL_VARIANTS_WORD_MASK
+#  undef BYTES_REMAINING_IN_WORD
 #endif
 
 #define is_utf8_string(s, len)  is_utf8_string_loclen(s, len, NULL, NULL)

From 591ae6435b032b108d6f36babcb2076aba36a935 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 05:38:06 -0600
Subject: [PATCH 05/11] Create WORTH_PER_WORD_LOOP(),
 WORTH_PER_WORD_LOOP_BINMODE()

There are several places in the perl core that, for performance,  use
word-at-a-time operations on byte data when the data to be processed is
long enough to overcome the extra setup overhead required.

The code that does this is not immediately obvious, and is currently
repeated at each such place.

This macro creates two macros that encapsulate this logic, making each
place that uses them easier to read.

One macro is for data that isn't dependent on the character set.  The
other is for character data.  EBCDIC data is not suitable for per-word
operation, so the this macro always returns false on an EBCDIC platform.
This allows for the removal of some EBCDIC #ifdefs in our code base.
---
 inline.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/inline.h b/inline.h
index d8ea259429fd..cee0537206ea 100644
--- a/inline.h
+++ b/inline.h
@@ -1475,6 +1475,48 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
 /* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1 */
 #  define PERL_IS_SUBWORD_ADDR(x) (BYTES_REMAINING_IN_WORD(x) != 0)
 
+/* Some tasks that are byte-oriented can be done as well a full word-at-a-time,
+ * running 8 times faster on an 8-byte word, for example.  But there is
+ * generally extra setup required to do this, and byte-at-a-time must be used
+ * anyway to get to the next word boundary.  This macro calculates whether the
+ * trade-off is worth doing.  If not, it returns NULL; if so, it returns a
+ * pointer to the first byte of the next word.  Code using this is typically
+ * structured like:
+ *      U8 * next_word_boundary = WORTH_PER_LOOP()
+ *      if (next_word_boundary) {
+ *          loop per-byte until next_word_boundary
+ *          loop per-word until less than a word left before upper boundary
+ *      }
+ *      loop per-byte until reach final boundary
+ *
+ * 's' is the current position in the string
+ * 'e' is the upper string bound
+ * 'full_words_needed' is the caller's determination of where to make the
+ *      trade-off between per-byte and per-word.  Only if the number of words
+ *      in the input string is at least this many, does the macro return
+ *      non-NULL.
+ *
+ * Because of EBCDIC, there are two forms of this macro.
+ * WORTH_PER_WORD_LOOP_BINMODE() is for use when the data being examined is
+ * not dependent on the character set.  The more usual form is plain
+ * WORTH_PER_WORD_LOOP() for character data.  Because EBCDIC needs an extra
+ * transformation, per-word operations are not appropriate on it, so the macro
+ * always returns NULL, meaning don't use a per-word loop on an EBCDIC
+ * platform. */
+#  define WORTH_PER_WORD_LOOP_BINMODE(s, e, full_words_needed)      \
+       /* Note multiple evaluations of 's' */                       \
+       ( ( ( (s) + BYTES_REMAINING_IN_WORD(s)                       \
+                 + (full_words_needed) * PERL_WORDSIZE) < (e) )     \
+        ? ((s) + BYTES_REMAINING_IN_WORD(s))                        \
+        : NULL)
+
+#  ifdef EBCDIC
+#    define WORTH_PER_WORD_LOOP(s, e, f)  NULL
+#  else
+#    define WORTH_PER_WORD_LOOP(s, e, f)                \
+        WORTH_PER_WORD_LOOP_BINMODE(s, e, f)
+#  endif
+
 /*
 =for apidoc      is_utf8_invariant_string
 =for apidoc_item is_utf8_invariant_string_loc

From bd63974083bee926464b162ea50aad885faa21ce Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 06:29:32 -0600
Subject: [PATCH 06/11]  Use new WORTH_PER_WORD_LOOP()

 This converts the places that could benefit from this new macro (and
 its kin) to use them.
---
 inline.h  | 37 +++++++------------------------------
 regexec.c | 33 +++++++++------------------------
 utf8.c    | 49 +++++++++++--------------------------------------
 3 files changed, 27 insertions(+), 92 deletions(-)

diff --git a/inline.h b/inline.h
index cee0537206ea..101bd4dd4ca1 100644
--- a/inline.h
+++ b/inline.h
@@ -1573,25 +1573,13 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
         len = strlen((const char *)s);
     }
 
-
-#ifndef EBCDIC
-
     /* Do the word-at-a-time iff there is at least one usable full word.  That
      * means that after advancing to a word boundary, there still is at least a
-     * full word left.  The number of bytes needed to advance is 'wordsize -
-     * offset' unless offset is 0. */
-    if ((STRLEN) (send - x) >= PERL_WORDSIZE
-
-                            /* This term is wordsize if subword; 0 if not */
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
-
-                            /* 'offset' */
-                          - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
-    {
+     * full word left. */
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, send, 1);
 
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+    if (per_byte_end) {
+        while (x < per_byte_end ) {
             if (! UTF8_IS_INVARIANT(*x)) {
                 if (ep) {
                     *ep = x;
@@ -1633,8 +1621,6 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
         } while (x + PERL_WORDSIZE <= send);
     }
 
-#endif      /* End of ! EBCDIC */
-
     /* Process per-byte.  (Can't use libc functions like strpbrk() because
      * input isn't necessarily a C string) */
     while (x < send) {
@@ -2116,18 +2102,11 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
     const U8* x = s;
     Size_t count = 0;
 
-#  ifndef EBCDIC
-
     /* Test if the string is long enough to use word-at-a-time.  (Logic is the
      * same as for is_utf8_invariant_string()) */
-    if ((STRLEN) (e - x) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
-                          - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, e, 1);
+    if (per_byte_end) {
+        while (x < per_byte_end ) {
             count += ! UTF8_IS_INVARIANT(*x++);
         }
 
@@ -2143,8 +2122,6 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
         } while (x + PERL_WORDSIZE <= e);
     }
 
-#  endif
-
     /* Process per-byte */
     while (x < e) {
         if (! UTF8_IS_INVARIANT(*x)) {
diff --git a/regexec.c b/regexec.c
index cac29563b6bd..53257a9fdca2 100644
--- a/regexec.c
+++ b/regexec.c
@@ -632,14 +632,9 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
      * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
      * */
 
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (*s != span_byte) {
                 return s;
             }
@@ -705,14 +700,9 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
      * returns 'send' if none found.  It uses word-level operations instead of
      * byte to speed up the process */
 
-#ifndef EBCDIC
-
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (((*s) & mask) == byte) {
                 return s;
             }
@@ -757,8 +747,6 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
         } while (s + PERL_WORDSIZE <= send);
     }
 
-#endif
-
     while (s < send) {
         if (((*s) & mask) == byte) {
             return s;
@@ -781,12 +769,9 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
      * function.  Returns 'send' if none found.  Works like find_span_end(),
      * except for the AND */
 
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (((*s) & mask) != span_byte) {
                 return s;
             }
diff --git a/utf8.c b/utf8.c
index fdd7fc6f9f4d..4c6d1fe879f1 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2635,13 +2635,8 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
      * cachegrind).  The number isn't critical, as at these sizes, the total
      * time spent isn't large either way */
 
-#ifndef EBCDIC
-
-    if (e - s0 < 96)
-
-#endif
-
-    {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s0, e, 12);
+    if (! per_byte_end) {
         while (s < e) { /* Count characters directly */
 
             /* Take extra care to not exceed 'e' (which would be undefined
@@ -2670,21 +2665,14 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
         return s - s0;
     }
 
-#ifndef EBCDIC
-
     /* Count continuations, word-at-a-time.
      *
      * We need to stop before the final start character in order to
      * preserve the limited error checking that's always been done */
     const U8 * e_limit = e - UTF8_MAXBYTES;
 
-    /* Points to the first byte >=s which is positioned at a word boundary.  If
-     * s is on a word boundary, it is s, otherwise it is to the next word. */
-    const U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                                    - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK);
-
     /* Process up to a full word boundary. */
-    while (s < partial_word_end) {
+    while (s < per_byte_end ) {
         const Size_t skip = UTF8SKIP(s);
 
         continuations += skip - 1;
@@ -2692,8 +2680,8 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
     }
 
     /* Adjust back down any overshoot */
-    continuations -= s - partial_word_end;
-    s = partial_word_end;
+    continuations -= s - per_byte_end;
+    s = per_byte_end;
 
     do { /* Process per-word */
 
@@ -2742,8 +2730,6 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
         break;
     }
 
-#  endif
-
     if (LIKELY(e == s)) {
         return s - s0 - continuations;
     }
@@ -3014,15 +3000,11 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
     Size_t invariant_length = first_variant - s0;
     Size_t variant_count = 0;
 
-#ifndef EBCDIC      /* The below relies on the bit patterns of UTF-8 */
-
-    /* Do a first pass through the string to see if it actually is translatable
-     * into bytes, and if so, how big the result is.  On long strings this is
-     * done a word at a time, so is relatively quick. (There is some
-     * start-up/tear-down overhead with the per-word algorithm, so no real gain
+    /* There is some start-up/tear-down overhead with this, so no real gain
      * unless the remaining portion of the string is long enough.  The current
-     * value is just a guess.)  On EBCDIC, it's always per-byte. */
-    if ((send - s) > (ptrdiff_t) (5 * PERL_WORDSIZE)) {
+     * value is just a guess. */
+    U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 5);
+    if (per_byte_end) {
 
         /* If the string contains any start byte besides C2 and C3, then it
          * isn't translatable into bytes */
@@ -3031,15 +3013,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         const PERL_UINTMAX_T C2_mask = PERL_COUNT_MULTIPLIER * 0xC2;
         const PERL_UINTMAX_T FE_mask = PERL_COUNT_MULTIPLIER * 0xFE;
 
-        /* Points to the first byte >=s which is positioned at a word boundary.
-         * If s is on a word boundary, it is s, otherwise it is the first byte
-         * of the next word. */
-        U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                                - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK);
-
-        /* Here there is at least a full word beyond the first word boundary.
-         * Process up to that boundary. */
-        while (s < partial_word_end) {
+        while (s < per_byte_end ) {
             if (! UTF8_IS_INVARIANT(*s)) {
                 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {
                     return false;
@@ -3053,7 +3027,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         }
 
         /* Adjust back down any overshoot */
-        s = partial_word_end;
+        s = per_byte_end;
 
         /* Process per-word */
         do {
@@ -3116,7 +3090,6 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         }
     }
 
-#endif
     /* Do the straggler bytes beyond what the loop above did */
     while (s < send) {
         if (! UTF8_IS_INVARIANT(*s)) {

From e06f36f4e323e6f741a603c927c73c4b205a0443 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 06:30:11 -0600
Subject: [PATCH 07/11] utf8.c: Clarify comment

---
 utf8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utf8.c b/utf8.c
index 4c6d1fe879f1..5f67da67fe7d 100644
--- a/utf8.c
+++ b/utf8.c
@@ -3090,7 +3090,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         }
     }
 
-    /* Do the straggler bytes beyond what the loop above did */
+    /* Do the straggler bytes beyond the final word boundary */
     while (s < send) {
         if (! UTF8_IS_INVARIANT(*s)) {
             if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {

From b57a774a25c31904e1d99bbb66196d78f1b7fbb5 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 06:33:40 -0600
Subject: [PATCH 08/11] Remove PERL_IS_SUBWORD_ADDR

This has been subsumed by BYTES_REMAINING_IN_WORD, and is no longer
used.
---
 inline.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/inline.h b/inline.h
index 101bd4dd4ca1..200e1f0283c2 100644
--- a/inline.h
+++ b/inline.h
@@ -1472,9 +1472,6 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
  *      (6 & 0x7)                           (8 & 0x7)
  *      6                                   0                              */
 
-/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1 */
-#  define PERL_IS_SUBWORD_ADDR(x) (BYTES_REMAINING_IN_WORD(x) != 0)
-
 /* Some tasks that are byte-oriented can be done as well a full word-at-a-time,
  * running 8 times faster on an 8-byte word, for example.  But there is
  * generally extra setup required to do this, and byte-at-a-time must be used

From b302dbab0257a5d8bdc14842c072f75c280a2da0 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 22 Oct 2025 06:34:39 -0600
Subject: [PATCH 09/11] inline.h: Remove trailing blanks

---
 inline.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/inline.h b/inline.h
index 200e1f0283c2..8ed1af68e42d 100644
--- a/inline.h
+++ b/inline.h
@@ -1361,7 +1361,7 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
     /* Note that this is branchless except for the switch() jump table, and
      * checking that the caller wants a *retlen returned.
      *
-     * There is wasted effort for length 1 inputs of initializing 'uv' to 0 
+     * There is wasted effort for length 1 inputs of initializing 'uv' to 0
      * and calculating 'full_shift' (unless the compiler optimizes that out).
      * Benchmarks indicate this is acceptable.
      * See GH #23690 */
@@ -3307,7 +3307,7 @@ Perl_utf8_to_uv_msgs(const U8 * const s0,
          *
          * The terminology of the dfa refers to a 'class'.  The variable 'type'
          * would have been named 'class' except that is a reserved word in C++
-         * 
+         *
          * The table can be a U16 on EBCDIC platforms, so 'state' is declared
          * as U16; 'type' is likely to never occupy more than 5 bits.  */
         PERL_UINT_FAST8_T type = PL_strict_utf8_dfa_tab[*s];
@@ -4781,7 +4781,7 @@ extracted from C<sv> using L</C<SvPV_const>>.  C<sv> must not be NULL.
 Memory deallocation
 
 To prevent memory leaks, the memory allocated for the new string needs to be
-freed when no longer needed.  
+freed when no longer needed.
 
 =over
 

From d734970c7fd13d8d57942f1ad8a732eb47955268 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Thu, 23 Oct 2025 08:22:46 -0600
Subject: [PATCH 10/11] variant_byte_number: Handle unusual byte ordering

Instead of refusing to compile, it is easy to handle this case.
---
 inline.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/inline.h b/inline.h
index 8ed1af68e42d..ffc0b1bce609 100644
--- a/inline.h
+++ b/inline.h
@@ -2055,8 +2055,20 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
 
     return (unsigned int) word;
 
-#  else
-#    error Unexpected byte order
+#else   /* Unhandled byte-order; the compiler knows which comes first */
+
+    const U8 * bytes = (U8 *) &word;
+    for (unsigned int i = 0; i <  sizeof(word); i++) {
+        if (bytes[i]) {
+            return i;
+        }
+    }
+
+    assert(0);
+
+    /* If all else fails, it's better to return something than just random */
+    return 0;
+
 #  endif
 
 }

From 94a5ebf871ff8f1444e6b91ecf0bfbde93d644d4 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Thu, 23 Oct 2025 07:41:33 -0600
Subject: [PATCH 11/11] Remove some special EBCDIC code

The 'variant_byte_number' function was written to find the byte number
in a word of the first byte whose meaning varies depending on if the
string it is part of is encoded in UTF-8 or not.  On ASCII machines,
that is simply when the upper bit is set.  On EBCDIC machines, there is
no similar pattern, so this function hasn't been compiled on those.

A long time ago, I realized that this function could also handle binary
data by coercing that binary data into having the form of having that
bit set or not depending on the pattern being looked for, and then
calling that function.

But I actually hadn't realized until now that it was binary data not
tied to a character set that was being worked on.  This commit rectifies
that.  A new alias is added for that function that emphasizes that it
works on binary data, the function is now compiled for EBCDIC, and the
EBCDIC-only code that avoided using it is now removed.
---
 embed.fnc |  6 ++----
 embed.h   |  4 +---
 inline.h  | 20 +++++++++++++-------
 proto.h   | 15 +++++----------
 regexec.c | 28 +++++-----------------------
 5 files changed, 26 insertions(+), 47 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 470deba3ac8f..8eb86869bbee 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -4010,6 +4010,8 @@ CRTdip	|UV	|valid_utf8_to_uv					\
 CRTdmp	|UV	|valid_utf8_to_uvchr					\
 				|NN const U8 *s 			\
 				|NULLOK STRLEN *retlen
+CRTip	|unsigned int|variant_byte_number				\
+				|PERL_UINTMAX_T word
 Adp	|int	|vcmp		|NN SV *lhv				\
 				|NN SV *rhv
 Adpr	|void	|vcroak 	|NULLOK const char *pat 		\
@@ -4108,10 +4110,6 @@ TXp	|void	|set_padlist	|NN CV *cv				\
 : Used in sv.c
 p	|void	|dump_sv_child	|NN SV *sv
 #endif
-#if !defined(EBCDIC)
-CRTip	|unsigned int|variant_byte_number				\
-				|PERL_UINTMAX_T word
-#endif
 #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
 ARdp	|I32	|my_chsize	|int fd 				\
 				|Off_t length
diff --git a/embed.h b/embed.h
index 7394c28366f1..42a12ea3dc79 100644
--- a/embed.h
+++ b/embed.h
@@ -848,6 +848,7 @@
 # define valid_identifier_sv(a)                 Perl_valid_identifier_sv(aTHX_ a)
 # define valid_utf8_to_uv                       Perl_valid_utf8_to_uv
 # define Perl_valid_utf8_to_uvchr               valid_utf8_to_uvchr
+# define variant_byte_number                    Perl_variant_byte_number
 # define vcmp(a,b)                              Perl_vcmp(aTHX_ a,b)
 # define vcroak(a,b)                            Perl_vcroak(aTHX_ a,b)
 # define vdeb(a,b)                              Perl_vdeb(aTHX_ a,b)
@@ -874,9 +875,6 @@
 #   define pad_setsv(a,b)                       Perl_pad_setsv(aTHX_ a,b)
 #   define pad_sv(a)                            Perl_pad_sv(aTHX_ a)
 # endif
-# if !defined(EBCDIC)
-#   define variant_byte_number                  Perl_variant_byte_number
-# endif
 # if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
 #   define my_chsize(a,b)                       Perl_my_chsize(aTHX_ a,b)
 # endif
diff --git a/inline.h b/inline.h
index ffc0b1bce609..e66e9f7034a5 100644
--- a/inline.h
+++ b/inline.h
@@ -2008,13 +2008,20 @@ Perl_single_1bit_pos32(U32 word)
 
 }
 
-#ifndef EBCDIC
+/* Returns the byte number of the lowest numbered-byte whose uppermost bit is
+ * set */
+#define first_upper_bit_set_byte_number(word) Perl_variant_byte_number(word)
 
 PERL_STATIC_INLINE unsigned int
 Perl_variant_byte_number(PERL_UINTMAX_T word)
 {
-    /* This returns the position in a word (0..7) of the first variant byte in
-     * it.  This is a helper function.  Note that there are no branches */
+    /* This returns the position in a word (0..7) of the first byte whose
+     * uppermost bit is set.  On ASCII boxes, this is equivalent to the first
+     * byte whose representation is different in UTF-8 vs not, hence the name
+     * and text in the comments.  It was only later that this was used for
+     * binary data, not tied to the character set.
+     *
+     * This is a helper function.  Note that there are no branches */
 
     /* Get just the msb bits of each byte */
     word &= PERL_VARIANTS_WORD_MASK;
@@ -2023,7 +2030,7 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
      * word */
     assert(word);
 
-#  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
+#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
 
     /* Bytes are stored like
      *  Byte8 ... Byte2 Byte1
@@ -2036,7 +2043,7 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
      * to 0..7 */
     return (unsigned int) ((word + 1) >> 3) - 1;
 
-#  elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
+#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 
     /* Bytes are stored like
      *  Byte1 Byte2  ... Byte8
@@ -2069,11 +2076,10 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
     /* If all else fails, it's better to return something than just random */
     return 0;
 
-#  endif
+#endif
 
 }
 
-#endif
 #if defined(PERL_CORE) || defined(PERL_EXT)
 
 /*
diff --git a/proto.h b/proto.h
index 0703f6997a28..7aabf8d17f1c 100644
--- a/proto.h
+++ b/proto.h
@@ -5698,16 +5698,6 @@ Perl_dump_sv_child(pTHX_ SV *sv)
 # define PERL_ARGS_ASSERT_DUMP_SV_CHILD         \
         assert(sv)
 
-#endif
-#if !defined(EBCDIC)
-
-# if !defined(PERL_NO_INLINE_FUNCTIONS)
-PERL_STATIC_INLINE unsigned int
-Perl_variant_byte_number(PERL_UINTMAX_T word)
-        __attribute__warn_unused_result__;
-#   define PERL_ARGS_ASSERT_VARIANT_BYTE_NUMBER
-
-# endif
 #endif
 #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
 PERL_CALLCONV I32
@@ -10390,6 +10380,11 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
 # define PERL_ARGS_ASSERT_VALID_UTF8_TO_UV      \
         assert(s)
 
+PERL_STATIC_INLINE unsigned int
+Perl_variant_byte_number(PERL_UINTMAX_T word)
+        __attribute__warn_unused_result__;
+# define PERL_ARGS_ASSERT_VARIANT_BYTE_NUMBER
+
 PERL_STATIC_INLINE void
 Perl_cx_popblock(pTHX_ PERL_CONTEXT *cx);
 # define PERL_ARGS_ASSERT_CX_POPBLOCK           \
diff --git a/regexec.c b/regexec.c
index 53257a9fdca2..d04eb96a4305 100644
--- a/regexec.c
+++ b/regexec.c
@@ -653,15 +653,9 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
                 continue;
             }
 
-            /* Here, at least one byte in the word isn't 'span_byte'. */
-
-#ifdef EBCDIC
-
-            break;
-
-#else
-
-            /* This xor leaves 1 bits only in those non-matching bytes */
+            /* Here, at least one byte in the word isn't 'span_byte'.
+             *
+             * This xor leaves 1 bits only in those non-matching bytes */
             span_word ^= * (PERL_UINTMAX_T *) s;
 
             /* Make sure the upper bit of each non-matching byte is set.  This
@@ -671,10 +665,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
             span_word |= span_word << 4;
 
             /* That reduces the problem to what this function solves */
-            return s + variant_byte_number(span_word);
-
-#endif
-
+            return s + first_upper_bit_set_byte_number(span_word);
         } while (s + PERL_WORDSIZE <= send);
     }
 
@@ -789,20 +780,11 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
                 continue;
             }
 
-#ifdef EBCDIC
-
-            break;
-
-#else
-
             masked ^= span_word;
             masked |= masked << 1;
             masked |= masked << 2;
             masked |= masked << 4;
-            return s + variant_byte_number(masked);
-
-#endif
-
+            return s + first_upper_bit_set_byte_number(masked);
         } while (s + PERL_WORDSIZE <= send);
     }