Skip to content

Commit a46ee10

Browse files
committed
toke.c dont call libc's memcmp() to test 1 byte in Perl_scan_str()
delim_byte_len is almost always 1, open_delim_str is almost always '"' or '\'' or something similar. I'm not sure which exact string of PP code will make delim_byte_len not be 1, but it would be too rare to optimize for but still must be supported. Just test the char directly if its length of 1. Invoking libc memcmp() requires 4 ABI inputs on any CPU, and while most of the code paths above the memEQ() lines are constants directly initialized inside Perl_scan_str(), one branch uses "utf8_to_uv_or_die(,,&delim_byte_len)" which optimizes to Perl_utf8_to_uvchr_buf_helper(,,,&delim_byte_len) making the value in STRLEN delim_byte_len unbounded according to all CC. All CCs must assume the value Perl_utf8_to_uvchr_buf_helper() put inside delim_byte_len could be a 4.7GB DVD or 25GB BD .iso file. Put the retval of SvGROW() to use. Don't let C auto var delim_byte_len escape with "&" op thru utf8_to_uv_or_die(). Var delim_byte_len can never be stored in a register again by any CC if it escapes and must be reread from C stack after ever possible call if it escapes.
1 parent 52d9c9c commit a46ee10

File tree

1 file changed

+20
-11
lines changed

1 file changed

+20
-11
lines changed

toke.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11645,9 +11645,12 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
1164511645
open_delim_str[0] = *s;
1164611646
delim_byte_len = 1;
1164711647
}
11648-
else {
11648+
else { /* don't let delim_byte_len escape and be a volatile mem addr */
11649+
STRLEN delim_byte_len_tmp;
1164911650
open_delim_code = utf8_to_uv_or_die((U8*)s, (U8*)PL_bufend,
11650-
&delim_byte_len);
11651+
&delim_byte_len_tmp);
11652+
/* CC can safely keep delim_byte_len in a register until the end */
11653+
delim_byte_len = delim_byte_len_tmp;
1165111654
if (UNLIKELY(! is_grapheme((U8 *) start,
1165211655
(U8 *) s,
1165311656
(U8 *) PL_bufend,
@@ -11764,9 +11767,9 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
1176411767
s += delim_byte_len;
1176511768
for (;;) {
1176611769
/* extend sv if need be */
11767-
SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11770+
char * pv = SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
1176811771
/* set 'to' to the next character in the sv's string */
11769-
to = SvPVX(sv)+SvCUR(sv);
11772+
to = pv + SvCUR(sv);
1177011773

1177111774
/* read until we run out of string, or we find the closing delimiter */
1177211775
while (s < PL_bufend) {
@@ -11784,18 +11787,22 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
1178411787
* discard those that escape the closing delimiter, just
1178511788
* discard this one */
1178611789
if ( ! keep_bracketed_quoted
11787-
&& ( memEQ(s + 1, open_delim_str, delim_byte_len)
11788-
|| ( PL_multi_open == PL_multi_close
11789-
&& re_reparse && s[1] == '\\')
11790-
|| memEQ(s + 1, close_delim_str, delim_byte_len)))
11791-
{
11790+
&& ((delim_byte_len == 1
11791+
? (s[1] == open_delim_str[0]
11792+
|| s[1] == close_delim_str[0])
11793+
: (memEQ(s + 1, open_delim_str, delim_byte_len)
11794+
|| memEQ(s + 1, close_delim_str, delim_byte_len)))
11795+
|| (PL_multi_open == PL_multi_close
11796+
&& re_reparse && s[1] == '\\'))) {
1179211797
s++;
1179311798
}
1179411799
else /* any other escapes are simply copied straight through */
1179511800
*to++ = *s++;
1179611801
}
1179711802
else if ( s < PL_bufend - (delim_byte_len - 1)
11798-
&& memEQ(s, close_delim_str, delim_byte_len)
11803+
&& (delim_byte_len == 1
11804+
? s[0] == close_delim_str[0]
11805+
: memEQ(s, close_delim_str, delim_byte_len))
1179911806
&& --brackets <= 0)
1180011807
{
1180111808
/* Found unescaped closing delimiter, unnested if we care about
@@ -11824,7 +11831,9 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int
1182411831
/* No nesting if open eq close */
1182511832
else if ( PL_multi_open != PL_multi_close
1182611833
&& s < PL_bufend - (delim_byte_len - 1)
11827-
&& memEQ(s, open_delim_str, delim_byte_len))
11834+
&& (delim_byte_len == 1
11835+
? s[0] == open_delim_str[0]
11836+
: memEQ(s, open_delim_str, delim_byte_len)))
1182811837
{
1182911838
brackets++;
1183011839
}

0 commit comments

Comments
 (0)