Skip to content

Commit f241a5b

Browse files
committed
utf8.c: Remove intermediate value
By not overriding the computed value of malformed input until later in the function, we can eliminate this temporary variable. This paves the way to a much bigger simplification in the next commit.
1 parent 15454ad commit f241a5b

File tree

1 file changed

+4
-9
lines changed

1 file changed

+4
-9
lines changed

utf8.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,7 +1377,6 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
13771377
U8 * adjusted_s0;
13781378
U8 temp_char_buf[UTF8_MAXBYTES + 1]; /* Used to avoid a Newx in this
13791379
routine; see [perl #130921] */
1380-
UV uv_so_far;
13811380
dTHX;
13821381

13831382
PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER;
@@ -1421,7 +1420,6 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
14211420
avail_len = 0;
14221421
discard_errors = 0;
14231422
adjusted_s0 = (U8 *) s0;
1424-
uv_so_far = 0;
14251423

14261424
if (errors) {
14271425
*errors = 0;
@@ -1534,16 +1532,10 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
15341532
* A convenience macro that matches either of the too-short conditions. */
15351533
# define UTF8_GOT_TOO_SHORT (UTF8_GOT_SHORT|UTF8_GOT_NON_CONTINUATION)
15361534

1537-
if (UNLIKELY(possible_problems & UTF8_GOT_TOO_SHORT)) {
1538-
uv_so_far = uv;
1539-
uv = UNICODE_REPLACEMENT;
1540-
}
1541-
15421535
/* Check for overflow. The algorithm requires us to not look past the end
15431536
* of the current character, even if partial, so the upper limit is 's' */
15441537
if (UNLIKELY(does_utf8_overflow(s0, s) >= ALMOST_CERTAINLY_OVERFLOWS)) {
15451538
possible_problems |= UTF8_GOT_OVERFLOW;
1546-
uv = UNICODE_REPLACEMENT;
15471539
}
15481540

15491541
/* Check for overlong. If no problems so far, 'uv' is the correct code
@@ -1566,7 +1558,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
15661558
* cases */
15671559
&& LIKELY(! (possible_problems & UTF8_GOT_OVERFLOW)))
15681560
{
1569-
UV min_uv = uv_so_far;
1561+
UV min_uv = uv;
15701562
STRLEN i;
15711563

15721564
/* Here, the input is both overlong and is missing some trailing
@@ -1714,6 +1706,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
17141706
* extended UTF-8, but we handle all three cases here */
17151707
possible_problems &= ~(UTF8_GOT_SUPER|UTF8_GOT_PERL_EXTENDED);
17161708
*errors |= UTF8_GOT_OVERFLOW;
1709+
uv = UNICODE_REPLACEMENT;
17171710

17181711
/* But the API says we flag all errors found */
17191712
if (flags & (UTF8_WARN_SUPER|UTF8_DISALLOW_SUPER)) {
@@ -1807,6 +1800,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18071800

18081801
case UTF8_GOT_SHORT:
18091802
*errors |= UTF8_GOT_SHORT;
1803+
uv = UNICODE_REPLACEMENT;
18101804

18111805
if (! (flags & UTF8_ALLOW_SHORT)) {
18121806
disallowed = TRUE;
@@ -1829,6 +1823,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
18291823

18301824
case UTF8_GOT_NON_CONTINUATION:
18311825
*errors |= UTF8_GOT_NON_CONTINUATION;
1826+
uv = UNICODE_REPLACEMENT;
18321827

18331828
if (! (flags & UTF8_ALLOW_NON_CONTINUATION)) {
18341829
disallowed = TRUE;

0 commit comments

Comments
 (0)