@@ -1564,7 +1564,9 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1564
1564
}
1565
1565
else {
1566
1566
/* See if the input has malformations besides possibly overlong */
1567
- if (UNLIKELY (possible_problems & ~UTF8_GOT_LONG )) {
1567
+ if ( UNLIKELY (possible_problems & ~UTF8_GOT_LONG )
1568
+ && LIKELY (flags & ~(UTF8_DISALLOW_NONCHAR |UTF8_WARN_NONCHAR )))
1569
+ {
1568
1570
1569
1571
/* Here, the input is malformed in some way besides possibly
1570
1572
* overlong, except it doesn't overflow. If you look at the
@@ -1576,6 +1578,10 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1576
1578
* be enough information present to determine if what we have
1577
1579
* so far would, if filled out completely, be for one of these
1578
1580
* problematic code points we are being asked to check for.
1581
+ * But to determine if a code point is a non-character, we need
1582
+ * all bytes, so this effort would be wasted, hence the
1583
+ * conditional above excludes this step if those are the only
1584
+ * thing being checked for.
1579
1585
*
1580
1586
* The range of surrogates is
1581
1587
* ASCII platforms EBCDIC I8
@@ -1601,12 +1607,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
1601
1607
* This is done by pretending the input was filled out to its
1602
1608
* full length with occurrences of the smallest continuation
1603
1609
* byte. For surrogates we could just look at the bytes, but
1604
- * this single algorithm works for both those and supers.
1605
- *
1606
- * To determine if a code point is a non-character, we need all
1607
- * bytes, so this effort is wasted if the caller is looking for
1608
- * just those, but that is unlikely; the two official Unicode
1609
- * restrictions include the other two. */
1610
+ * this single algorithm works for both those and supers. */
1610
1611
for (unsigned i = curlen ; i < expectlen ; i ++ ) {
1611
1612
uv = UTF8_ACCUMULATE (uv ,
1612
1613
I8_TO_NATIVE_UTF8 (UTF_MIN_CONTINUATION_BYTE ));
0 commit comments