Skip to content

Commit 5ea209f

Browse files
committed
toke.c: Avoid UTF8SKIPs
This value is now returned from the isID(FIRST|CONT)_lazy_if_safe macros. Use it instead of re-deriving it.
1 parent 08c5ab5 commit 5ea209f

File tree

1 file changed

+51
-30
lines changed

1 file changed

+51
-30
lines changed

toke.c

Lines changed: 51 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2088,8 +2088,10 @@ S_check_uni(pTHX)
20882088
while (isSPACE(*PL_last_uni))
20892089
PL_last_uni++;
20902090
s = PL_last_uni;
2091-
while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2092-
s += UTF ? UTF8SKIP(s) : 1;
2091+
Size_t advance;
2092+
while ( (advance = isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
2093+
|| (advance = (*s == '-')))
2094+
s += advance;
20932095
if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
20942096
return;
20952097

@@ -5193,10 +5195,11 @@ S_check_scalar_slice(pTHX_ char *s)
51935195
{
51945196
return;
51955197
}
5196-
while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5197-
|| (*s && memCHRs(" \t$#+-'\"", *s)))
5198+
Size_t advance;
5199+
while ( (advance = isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
5200+
|| (advance = (*s && memCHRs(" \t$#+-'\"", *s))))
51985201
{
5199-
s += UTF ? UTF8SKIP(s) : 1;
5202+
s += advance;
52005203
}
52015204
if (*s == '}' || *s == ']')
52025205
pl_yylval.ival = OPpSLICEWARNING;
@@ -5402,8 +5405,11 @@ yyl_dollar(pTHX_ char *s)
54025405
while (t < PL_bufend && *t == ' ') t++;
54035406

54045407
/* strip off the name of the var */
5405-
while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5406-
t += UTF ? UTF8SKIP(t) : 1;
5408+
Size_t advance;
5409+
while ((advance = (isWORDCHAR_lazy_if_safe(t,
5410+
PL_bufend,
5411+
UTF))))
5412+
t += advance;
54075413
/* consumed a varname */
54085414
} else if (isDIGIT(*t)) {
54095415
/* deal with hex constants like 0x11 */
@@ -6407,6 +6413,7 @@ yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
64076413
* GSAR 97-07-21
64086414
*/
64096415
t = s;
6416+
Size_t advance;
64106417
if (*s == '\'' || *s == '"' || *s == '`') {
64116418
/* common case: get past first string, handling escapes */
64126419
for (t++; t < PL_bufend && *t != *s;)
@@ -6455,20 +6462,24 @@ yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
64556462
}
64566463
t++;
64576464
}
6458-
else
6465+
else {
64596466
/* skip plain q word */
6460-
while ( t < PL_bufend
6461-
&& isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6467+
while ( t < PL_bufend
6468+
&& (advance = isWORDCHAR_lazy_if_safe(t,
6469+
PL_bufend,
6470+
UTF)))
64626471
{
6463-
t += UTF ? UTF8SKIP(t) : 1;
6472+
t += advance;
64646473
}
6474+
}
64656475
}
6466-
else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6467-
t += UTF ? UTF8SKIP(t) : 1;
6476+
else if ((advance = isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))) {
6477+
t += advance;
64686478
while ( t < PL_bufend
6469-
&& isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6479+
&& (advance = isWORDCHAR_lazy_if_safe(t, PL_bufend,
6480+
UTF)))
64706481
{
6471-
t += UTF ? UTF8SKIP(t) : 1;
6482+
t += advance;
64726483
}
64736484
}
64746485
while (t < PL_bufend && isSPACE(*t))
@@ -10125,11 +10136,12 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
1012510136
s++;
1012610137
while (s < PL_bufend && isSPACE(*s))
1012710138
s++;
10128-
if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10139+
Size_t advance;
10140+
if ((advance = isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))) {
1012910141
const char * const w = s;
10130-
s += UTF ? UTF8SKIP(s) : 1;
10131-
while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10132-
s += UTF ? UTF8SKIP(s) : 1;
10142+
s += advance;
10143+
while ((advance = isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)))
10144+
s += advance;
1013310145
while (s < PL_bufend && isSPACE(*s))
1013410146
s++;
1013510147
if (*s == ',') {
@@ -10281,15 +10293,18 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
1028110293
while (*s < PL_bufend) {
1028210294
if (*d >= e)
1028310295
croak("%s", ident_too_long);
10284-
if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
10296+
Size_t advance;
10297+
if (is_utf8 && (advance = isIDFIRST_utf8_safe(*s, PL_bufend))) {
1028510298
/* The UTF-8 case must come first, otherwise things
1028610299
* like c\N{COMBINING TILDE} would start failing, as the
1028710300
* isWORDCHAR_A case below would gobble the 'c' up.
1028810301
*/
1028910302

10290-
char *t = *s + UTF8SKIP(*s);
10291-
while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10292-
t += UTF8SKIP(t);
10303+
char *t = *s + advance;
10304+
while ((advance = isIDCONT_utf8_safe((const U8*) t,
10305+
(const U8*) PL_bufend)))
10306+
{
10307+
t += advance;
1029310308
}
1029410309
if (*d + (t - *s) > e)
1029510310
croak("%s", ident_too_long);
@@ -10496,11 +10511,12 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
1049610511
/* note we have to check for a normal identifier first,
1049710512
* as it handles utf8 symbols, and only after that has
1049810513
* been ruled out can we look at the caret words */
10499-
if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10514+
Size_t advance;
10515+
if ((advance = isIDFIRST_lazy_if_safe(d, e, is_utf8) )) {
1050010516
/* if it starts as a valid identifier, assume that it is one.
1050110517
(the later check for } being at the expected point will trap
1050210518
cases where this doesn't pan out.) */
10503-
d += is_utf8 ? UTF8SKIP(d) : 1;
10519+
d += advance;
1050410520
parse_ident(&s, &d, e, 1, is_utf8, TRUE);
1050510521
*d = '\0';
1050610522
}
@@ -10998,8 +11014,9 @@ S_scan_heredoc(pTHX_ char *s)
1099811014

1099911015
peek = s;
1100011016

11001-
while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
11002-
peek += UTF ? UTF8SKIP(peek) : 1;
11017+
Size_t advance;
11018+
while ((advance = isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))) {
11019+
peek += advance;
1100311020
}
1100411021

1100511022
len = (peek - s >= e - d) ? (e - d) : (peek - s);
@@ -11442,9 +11459,13 @@ S_scan_inputsymbol(pTHX_ char *start)
1144211459
if (*d == '$' && d[1]) d++;
1144311460

1144411461
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
11445-
while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == ':'
11446-
|| (*d == '\'' && FEATURE_APOS_AS_NAME_SEP_IS_ENABLED)) {
11447-
d += UTF ? UTF8SKIP(d) : 1;
11462+
Size_t advance;
11463+
while ( (advance = isWORDCHAR_lazy_if_safe(d, e, UTF))
11464+
|| (advance = ( *d == ':'
11465+
|| ( *d == '\''
11466+
&& FEATURE_APOS_AS_NAME_SEP_IS_ENABLED))))
11467+
{
11468+
d += advance;
1144811469
}
1144911470

1145011471
/* If we've tried to read what we allow filehandles to look like, and

0 commit comments

Comments
 (0)