Skip to content

Commit b60f610

Browse files
committed
Add ASSUMEs for UTF-8 byte lengths
The maximum number of bytes in a Perl extended UTF-8 character is 13 on ASCII platforms; 14 on EBCDIC. Yet the variable that returns that number is a Size_t in the cases changed by this commit. By adding these ASSUMES to these functions, the compiler may be able to do some optimizations. I looked through the code base, and found no other instances where such a small value could be stored in a fully wide variable. With link time optimization, an ASSUME may be helpful even in non-inline functions.
1 parent 73c046c commit b60f610

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

inline.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,7 @@ Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
13331333
PERL_ARGS_ASSERT_VALID_UTF8_TO_UV;
13341334

13351335
const UV expectlen = UTF8SKIP(s);
1336+
ASSUME(inRANGE(expectlen, 1, UTF8_MAXBYTES));
13361337
const U8* send = s + expectlen;
13371338
UV uv = *s;
13381339

@@ -3213,6 +3214,7 @@ Perl_utf8_to_uv_msgs(const U8 * const s0,
32133214
if (LIKELY(state == 0)) {
32143215
if (advance_p) {
32153216
*advance_p = s - s0 + 1;
3217+
ASSUME(*advance_p <= UTF8_MAXBYTES);
32163218
}
32173219

32183220
*cp_p = UNI_TO_NATIVE(uv);
@@ -3221,7 +3223,10 @@ Perl_utf8_to_uv_msgs(const U8 * const s0,
32213223
}
32223224

32233225
/* Here is potentially problematic. Use the full mechanism */
3224-
return utf8_to_uv_msgs_helper_(s0, e, cp_p, advance_p, flags, errors, msgs);
3226+
bool success = utf8_to_uv_msgs_helper_(s0, e, cp_p, advance_p,
3227+
flags, errors, msgs);
3228+
ASSUME(advance_p == NULL || inRANGE(*advance_p, 1, UTF8_MAXBYTES));
3229+
return success;
32253230
}
32263231

32273232
PERL_STATIC_INLINE UV
@@ -3231,6 +3236,7 @@ Perl_utf8_to_uv_or_die(const U8 *s, const U8 *e, STRLEN *advance_p)
32313236

32323237
UV cp;
32333238
(void) utf8_to_uv_flags(s, e, &cp, advance_p, UTF8_DIE_IF_MALFORMED);
3239+
ASSUME(advance_p == NULL || inRANGE(*advance_p, 1, UTF8_MAXBYTES));
32343240
return cp;
32353241
}
32363242

utf8.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2568,6 +2568,7 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0,
25682568

25692569
if (advance_p) {
25702570
*advance_p = curlen;
2571+
ASSUME(inRANGE(*advance_p, 1, UTF8_MAXBYTES));
25712572
}
25722573

25732574
*cp_p = UNI_TO_NATIVE(uv);

0 commit comments

Comments
 (0)