diff --git a/embed.fnc b/embed.fnc
index 470deba3ac8f..8eb86869bbee 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -4010,6 +4010,8 @@ CRTdip	|UV	|valid_utf8_to_uv					\
 CRTdmp	|UV	|valid_utf8_to_uvchr					\
 				|NN const U8 *s 			\
 				|NULLOK STRLEN *retlen
+CRTip	|unsigned int|variant_byte_number				\
+				|PERL_UINTMAX_T word
 Adp	|int	|vcmp		|NN SV *lhv				\
 				|NN SV *rhv
 Adpr	|void	|vcroak 	|NULLOK const char *pat 		\
@@ -4108,10 +4110,6 @@ TXp	|void	|set_padlist	|NN CV *cv				\
 : Used in sv.c
 p	|void	|dump_sv_child	|NN SV *sv
 #endif
-#if !defined(EBCDIC)
-CRTip	|unsigned int|variant_byte_number				\
-				|PERL_UINTMAX_T word
-#endif
 #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
 ARdp	|I32	|my_chsize	|int fd 				\
 				|Off_t length
diff --git a/embed.h b/embed.h
index 7394c28366f1..42a12ea3dc79 100644
--- a/embed.h
+++ b/embed.h
@@ -848,6 +848,7 @@
 # define valid_identifier_sv(a)                 Perl_valid_identifier_sv(aTHX_ a)
 # define valid_utf8_to_uv                       Perl_valid_utf8_to_uv
 # define Perl_valid_utf8_to_uvchr               valid_utf8_to_uvchr
+# define variant_byte_number                    Perl_variant_byte_number
 # define vcmp(a,b)                              Perl_vcmp(aTHX_ a,b)
 # define vcroak(a,b)                            Perl_vcroak(aTHX_ a,b)
 # define vdeb(a,b)                              Perl_vdeb(aTHX_ a,b)
@@ -874,9 +875,6 @@
 #   define pad_setsv(a,b)                       Perl_pad_setsv(aTHX_ a,b)
 #   define pad_sv(a)                            Perl_pad_sv(aTHX_ a)
 # endif
-# if !defined(EBCDIC)
-#   define variant_byte_number                  Perl_variant_byte_number
-# endif
 # if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
 #   define my_chsize(a,b)                       Perl_my_chsize(aTHX_ a,b)
 # endif
diff --git a/inline.h b/inline.h
index af1ec26c8b56..e66e9f7034a5 100644
--- a/inline.h
+++ b/inline.h
@@ -1361,7 +1361,7 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
     /* Note that this is branchless except for the switch() jump table, and
      * checking that the caller wants a *retlen returned.
      *
-     * There is wasted effort for length 1 inputs of initializing 'uv' to 0 
+     * There is wasted effort for length 1 inputs of initializing 'uv' to 0
      * and calculating 'full_shift' (unless the compiler optimizes that out).
      * Benchmarks indicate this is acceptable.
      * See GH #23690 */
@@ -1459,14 +1459,60 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
 #  define PERL_WORDSIZE            sizeof(PERL_UINTMAX_T)
 #  define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
 
-/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
- * or'ing together the lowest bits of 'x'.  Hopefully the final term gets
- * optimized out completely on a 32-bit system, and its mask gets optimized out
- * on a 64-bit system */
-#  define PERL_IS_SUBWORD_ADDR(x) (1 & (       PTR2nat(x)                     \
-                                      |   (  PTR2nat(x) >> 1)                 \
-                                      | ( ( (PTR2nat(x)                       \
-                                           & PERL_WORD_BOUNDARY_MASK) >> 2))))
+/* Given an address of a byte 'x', how many bytes away is that address to the
+ * following closest full word boundary. */
+#  define BYTES_REMAINING_IN_WORD(x)                                        \
+              ( (PERL_WORDSIZE - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))    \
+               & PERL_WORD_BOUNDARY_MASK)
+/* For example, consider two addresses in an 8 byte word size (the dots are
+ * don't cares):
+ *      0b...............010                0b...............000
+ *      ((8 - (0b1101010 & 0x7)) & 0x7)     ((8 - (0b1101000 & 0x7)) & 0x7)
+ *      ((8 - 0b10) & 0x7)                  ((8 - 0) & 0x7)
+ *      (6 & 0x7)                           (8 & 0x7)
+ *      6                                   0                              */
+
+/* Some tasks that are byte-oriented can be done as well a full word-at-a-time,
+ * running 8 times faster on an 8-byte word, for example.  But there is
+ * generally extra setup required to do this, and byte-at-a-time must be used
+ * anyway to get to the next word boundary.  This macro calculates whether the
+ * trade-off is worth doing.  If not, it returns NULL; if so, it returns a
+ * pointer to the first byte of the next word.  Code using this is typically
+ * structured like:
+ *      U8 * next_word_boundary = WORTH_PER_LOOP()
+ *      if (next_word_boundary) {
+ *          loop per-byte until next_word_boundary
+ *          loop per-word until less than a word left before upper boundary
+ *      }
+ *      loop per-byte until reach final boundary
+ *
+ * 's' is the current position in the string
+ * 'e' is the upper string bound
+ * 'full_words_needed' is the caller's determination of where to make the
+ *      trade-off between per-byte and per-word.  Only if the number of words
+ *      in the input string is at least this many, does the macro return
+ *      non-NULL.
+ *
+ * Because of EBCDIC, there are two forms of this macro.
+ * WORTH_PER_WORD_LOOP_BINMODE() is for use when the data being examined is
+ * not dependent on the character set.  The more usual form is plain
+ * WORTH_PER_WORD_LOOP() for character data.  Because EBCDIC needs an extra
+ * transformation, per-word operations are not appropriate on it, so the macro
+ * always returns NULL, meaning don't use a per-word loop on an EBCDIC
+ * platform. */
+#  define WORTH_PER_WORD_LOOP_BINMODE(s, e, full_words_needed)      \
+       /* Note multiple evaluations of 's' */                       \
+       ( ( ( (s) + BYTES_REMAINING_IN_WORD(s)                       \
+                 + (full_words_needed) * PERL_WORDSIZE) < (e) )     \
+        ? ((s) + BYTES_REMAINING_IN_WORD(s))                        \
+        : NULL)
+
+#  ifdef EBCDIC
+#    define WORTH_PER_WORD_LOOP(s, e, f)  NULL
+#  else
+#    define WORTH_PER_WORD_LOOP(s, e, f)                \
+        WORTH_PER_WORD_LOOP_BINMODE(s, e, f)
+#  endif
 
 /*
 =for apidoc      is_utf8_invariant_string
@@ -1515,35 +1561,22 @@ C<L</is_utf8_string>> and C<L</is_utf8_fixed_width_buf_flags>>.
 PERL_STATIC_INLINE bool
 Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 {
-    const U8* send;
-    const U8* x = s;
-
     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 
+    const U8* send = s + len;
+    const U8* x = s;
+
     if (len == 0) {
         len = strlen((const char *)s);
     }
 
-    send = s + len;
-
-#ifndef EBCDIC
-
     /* Do the word-at-a-time iff there is at least one usable full word.  That
      * means that after advancing to a word boundary, there still is at least a
-     * full word left.  The number of bytes needed to advance is 'wordsize -
-     * offset' unless offset is 0. */
-    if ((STRLEN) (send - x) >= PERL_WORDSIZE
-
-                            /* This term is wordsize if subword; 0 if not */
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
+     * full word left. */
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, send, 1);
 
-                            /* 'offset' */
-                          - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+    if (per_byte_end) {
+        while (x < per_byte_end ) {
             if (! UTF8_IS_INVARIANT(*x)) {
                 if (ep) {
                     *ep = x;
@@ -1585,8 +1618,6 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
         } while (x + PERL_WORDSIZE <= send);
     }
 
-#endif      /* End of ! EBCDIC */
-
     /* Process per-byte.  (Can't use libc functions like strpbrk() because
      * input isn't necessarily a C string) */
     while (x < send) {
@@ -1977,13 +2008,20 @@ Perl_single_1bit_pos32(U32 word)
 
 }
 
-#ifndef EBCDIC
+/* Returns the byte number of the lowest numbered-byte whose uppermost bit is
+ * set */
+#define first_upper_bit_set_byte_number(word) Perl_variant_byte_number(word)
 
 PERL_STATIC_INLINE unsigned int
 Perl_variant_byte_number(PERL_UINTMAX_T word)
 {
-    /* This returns the position in a word (0..7) of the first variant byte in
-     * it.  This is a helper function.  Note that there are no branches */
+    /* This returns the position in a word (0..7) of the first byte whose
+     * uppermost bit is set.  On ASCII boxes, this is equivalent to the first
+     * byte whose representation is different in UTF-8 vs not, hence the name
+     * and text in the comments.  It was only later that this was used for
+     * binary data, not tied to the character set.
+     *
+     * This is a helper function.  Note that there are no branches */
 
     /* Get just the msb bits of each byte */
     word &= PERL_VARIANTS_WORD_MASK;
@@ -1992,7 +2030,7 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
      * word */
     assert(word);
 
-#  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
+#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
 
     /* Bytes are stored like
      *  Byte8 ... Byte2 Byte1
@@ -2005,7 +2043,7 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
      * to 0..7 */
     return (unsigned int) ((word + 1) >> 3) - 1;
 
-#  elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
+#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 
     /* Bytes are stored like
      *  Byte1 Byte2  ... Byte8
@@ -2024,13 +2062,24 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
 
     return (unsigned int) word;
 
-#  else
-#    error Unexpected byte order
-#  endif
+#else   /* Unhandled byte-order; the compiler knows which comes first */
 
-}
+    const U8 * bytes = (U8 *) &word;
+    for (unsigned int i = 0; i <  sizeof(word); i++) {
+        if (bytes[i]) {
+            return i;
+        }
+    }
+
+    assert(0);
+
+    /* If all else fails, it's better to return something than just random */
+    return 0;
 
 #endif
+
+}
+
 #if defined(PERL_CORE) || defined(PERL_EXT)
 
 /*
@@ -2063,23 +2112,16 @@ C<L<perlapi/is_utf8_invariant_string_loc>>,
 PERL_STATIC_INLINE Size_t
 S_variant_under_utf8_count(const U8* const s, const U8* const e)
 {
-    const U8* x = s;
-    Size_t count = 0;
-
     PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
 
-#  ifndef EBCDIC
+    const U8* x = s;
+    Size_t count = 0;
 
     /* Test if the string is long enough to use word-at-a-time.  (Logic is the
      * same as for is_utf8_invariant_string()) */
-    if ((STRLEN) (e - x) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
-                          - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(x, e, 1);
+    if (per_byte_end) {
+        while (x < per_byte_end ) {
             count += ! UTF8_IS_INVARIANT(*x++);
         }
 
@@ -2095,8 +2137,6 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
         } while (x + PERL_WORDSIZE <= e);
     }
 
-#  endif
-
     /* Process per-byte */
     while (x < e) {
         if (! UTF8_IS_INVARIANT(*x)) {
@@ -2117,6 +2157,7 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
 #  undef PERL_COUNT_MULTIPLIER
 #  undef PERL_WORD_BOUNDARY_MASK
 #  undef PERL_VARIANTS_WORD_MASK
+#  undef BYTES_REMAINING_IN_WORD
 #endif
 
 #define is_utf8_string(s, len)  is_utf8_string_loclen(s, len, NULL, NULL)
@@ -3284,7 +3325,7 @@ Perl_utf8_to_uv_msgs(const U8 * const s0,
          *
          * The terminology of the dfa refers to a 'class'.  The variable 'type'
          * would have been named 'class' except that is a reserved word in C++
-         * 
+         *
          * The table can be a U16 on EBCDIC platforms, so 'state' is declared
          * as U16; 'type' is likely to never occupy more than 5 bits.  */
         PERL_UINT_FAST8_T type = PL_strict_utf8_dfa_tab[*s];
@@ -4758,7 +4799,7 @@ extracted from C<sv> using L</C<SvPV_const>>.  C<sv> must not be NULL.
 Memory deallocation
 
 To prevent memory leaks, the memory allocated for the new string needs to be
-freed when no longer needed.  
+freed when no longer needed.
 
 =over
 
diff --git a/proto.h b/proto.h
index 0703f6997a28..7aabf8d17f1c 100644
--- a/proto.h
+++ b/proto.h
@@ -5698,16 +5698,6 @@ Perl_dump_sv_child(pTHX_ SV *sv)
 # define PERL_ARGS_ASSERT_DUMP_SV_CHILD         \
         assert(sv)
 
-#endif
-#if !defined(EBCDIC)
-
-# if !defined(PERL_NO_INLINE_FUNCTIONS)
-PERL_STATIC_INLINE unsigned int
-Perl_variant_byte_number(PERL_UINTMAX_T word)
-        __attribute__warn_unused_result__;
-#   define PERL_ARGS_ASSERT_VARIANT_BYTE_NUMBER
-
-# endif
 #endif
 #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
 PERL_CALLCONV I32
@@ -10390,6 +10380,11 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
 # define PERL_ARGS_ASSERT_VALID_UTF8_TO_UV      \
         assert(s)
 
+PERL_STATIC_INLINE unsigned int
+Perl_variant_byte_number(PERL_UINTMAX_T word)
+        __attribute__warn_unused_result__;
+# define PERL_ARGS_ASSERT_VARIANT_BYTE_NUMBER
+
 PERL_STATIC_INLINE void
 Perl_cx_popblock(pTHX_ PERL_CONTEXT *cx);
 # define PERL_ARGS_ASSERT_CX_POPBLOCK           \
diff --git a/regexec.c b/regexec.c
index d5e14c51beab..d04eb96a4305 100644
--- a/regexec.c
+++ b/regexec.c
@@ -632,15 +632,9 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
      * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
      * */
 
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-        PERL_UINTMAX_T span_word;
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (*s != span_byte) {
                 return s;
             }
@@ -648,7 +642,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
         }
 
         /* Create a word filled with the bytes we are spanning */
-        span_word = PERL_COUNT_MULTIPLIER * span_byte;
+        PERL_UINTMAX_T span_word = PERL_COUNT_MULTIPLIER * span_byte;
 
         /* Process per-word as long as we have at least a full word left */
         do {
@@ -659,15 +653,9 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
                 continue;
             }
 
-            /* Here, at least one byte in the word isn't 'span_byte'. */
-
-#ifdef EBCDIC
-
-            break;
-
-#else
-
-            /* This xor leaves 1 bits only in those non-matching bytes */
+            /* Here, at least one byte in the word isn't 'span_byte'.
+             *
+             * This xor leaves 1 bits only in those non-matching bytes */
             span_word ^= * (PERL_UINTMAX_T *) s;
 
             /* Make sure the upper bit of each non-matching byte is set.  This
@@ -677,10 +665,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
             span_word |= span_word << 4;
 
             /* That reduces the problem to what this function solves */
-            return s + variant_byte_number(span_word);
-
-#endif
-
+            return s + first_upper_bit_set_byte_number(span_word);
         } while (s + PERL_WORDSIZE <= send);
     }
 
@@ -706,23 +691,17 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
      * returns 'send' if none found.  It uses word-level operations instead of
      * byte to speed up the process */
 
-#ifndef EBCDIC
-
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-        PERL_UINTMAX_T word, mask_word;
-
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (((*s) & mask) == byte) {
                 return s;
             }
             s++;
         }
 
-        word      = PERL_COUNT_MULTIPLIER * byte;
-        mask_word = PERL_COUNT_MULTIPLIER * mask;
+        PERL_UINTMAX_T word = PERL_COUNT_MULTIPLIER * byte;
+        PERL_UINTMAX_T mask_word = PERL_COUNT_MULTIPLIER * mask;
 
         do {
             PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word;
@@ -759,8 +738,6 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
         } while (s + PERL_WORDSIZE <= send);
     }
 
-#endif
-
     while (s < send) {
         if (((*s) & mask) == byte) {
             return s;
@@ -783,21 +760,17 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
      * function.  Returns 'send' if none found.  Works like find_span_end(),
      * except for the AND */
 
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-        PERL_UINTMAX_T span_word, mask_word;
-
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP_BINMODE(s, send, 1);
+    if (per_byte_end) {
+        while (s < per_byte_end ) {
             if (((*s) & mask) != span_byte) {
                 return s;
             }
             s++;
         }
 
-        span_word = PERL_COUNT_MULTIPLIER * span_byte;
-        mask_word = PERL_COUNT_MULTIPLIER * mask;
+        PERL_UINTMAX_T span_word = PERL_COUNT_MULTIPLIER * span_byte;
+        PERL_UINTMAX_T mask_word = PERL_COUNT_MULTIPLIER * mask;
 
         do {
             PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word;
@@ -807,20 +780,11 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
                 continue;
             }
 
-#ifdef EBCDIC
-
-            break;
-
-#else
-
             masked ^= span_word;
             masked |= masked << 1;
             masked |= masked << 2;
             masked |= masked << 4;
-            return s + variant_byte_number(masked);
-
-#endif
-
+            return s + first_upper_bit_set_byte_number(masked);
         } while (s + PERL_WORDSIZE <= send);
     }
 
diff --git a/utf8.c b/utf8.c
index fdd7fc6f9f4d..5f67da67fe7d 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2635,13 +2635,8 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
      * cachegrind).  The number isn't critical, as at these sizes, the total
      * time spent isn't large either way */
 
-#ifndef EBCDIC
-
-    if (e - s0 < 96)
-
-#endif
-
-    {
+    const U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s0, e, 12);
+    if (! per_byte_end) {
         while (s < e) { /* Count characters directly */
 
             /* Take extra care to not exceed 'e' (which would be undefined
@@ -2670,21 +2665,14 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
         return s - s0;
     }
 
-#ifndef EBCDIC
-
     /* Count continuations, word-at-a-time.
      *
      * We need to stop before the final start character in order to
      * preserve the limited error checking that's always been done */
     const U8 * e_limit = e - UTF8_MAXBYTES;
 
-    /* Points to the first byte >=s which is positioned at a word boundary.  If
-     * s is on a word boundary, it is s, otherwise it is to the next word. */
-    const U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                                    - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK);
-
     /* Process up to a full word boundary. */
-    while (s < partial_word_end) {
+    while (s < per_byte_end ) {
         const Size_t skip = UTF8SKIP(s);
 
         continuations += skip - 1;
@@ -2692,8 +2680,8 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
     }
 
     /* Adjust back down any overshoot */
-    continuations -= s - partial_word_end;
-    s = partial_word_end;
+    continuations -= s - per_byte_end;
+    s = per_byte_end;
 
     do { /* Process per-word */
 
@@ -2742,8 +2730,6 @@ Perl_utf8_length(pTHX_ const U8 * const s0, const U8 * const e)
         break;
     }
 
-#  endif
-
     if (LIKELY(e == s)) {
         return s - s0 - continuations;
     }
@@ -3014,15 +3000,11 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
     Size_t invariant_length = first_variant - s0;
     Size_t variant_count = 0;
 
-#ifndef EBCDIC      /* The below relies on the bit patterns of UTF-8 */
-
-    /* Do a first pass through the string to see if it actually is translatable
-     * into bytes, and if so, how big the result is.  On long strings this is
-     * done a word at a time, so is relatively quick. (There is some
-     * start-up/tear-down overhead with the per-word algorithm, so no real gain
+    /* There is some start-up/tear-down overhead with this, so no real gain
      * unless the remaining portion of the string is long enough.  The current
-     * value is just a guess.)  On EBCDIC, it's always per-byte. */
-    if ((send - s) > (ptrdiff_t) (5 * PERL_WORDSIZE)) {
+     * value is just a guess. */
+    U8 * const per_byte_end = WORTH_PER_WORD_LOOP(s, send, 5);
+    if (per_byte_end) {
 
         /* If the string contains any start byte besides C2 and C3, then it
          * isn't translatable into bytes */
@@ -3031,15 +3013,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         const PERL_UINTMAX_T C2_mask = PERL_COUNT_MULTIPLIER * 0xC2;
         const PERL_UINTMAX_T FE_mask = PERL_COUNT_MULTIPLIER * 0xFE;
 
-        /* Points to the first byte >=s which is positioned at a word boundary.
-         * If s is on a word boundary, it is s, otherwise it is the first byte
-         * of the next word. */
-        U8 * partial_word_end = s + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-                                - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK);
-
-        /* Here there is at least a full word beyond the first word boundary.
-         * Process up to that boundary. */
-        while (s < partial_word_end) {
+        while (s < per_byte_end ) {
             if (! UTF8_IS_INVARIANT(*s)) {
                 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {
                     return false;
@@ -3053,7 +3027,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         }
 
         /* Adjust back down any overshoot */
-        s = partial_word_end;
+        s = per_byte_end;
 
         /* Process per-word */
         do {
@@ -3116,8 +3090,7 @@ Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
         }
     }
 
-#endif
-    /* Do the straggler bytes beyond what the loop above did */
+    /* Do the straggler bytes beyond the final word boundary */
     while (s < send) {
         if (! UTF8_IS_INVARIANT(*s)) {
             if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {