Skip to content

Commit c0ca4c8

Browse files
committed
S_scan_ident: Char in middle needt not be IDStart
This fixes a bug in this function, in which it required the second character in an identifier to be IDStart, instead of IDCont. This hasn't been caught because most identifiers are ASCII, and generally for the purposes of this function in the ASCII range, all \w characters can be IDStart.
1 parent 3ee55a2 commit c0ca4c8

File tree

2 files changed

+26
-14
lines changed

2 files changed

+26
-14
lines changed

t/comp/parser.t

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ BEGIN {
88
chdir 't' if -d 't';
99
}
1010

11-
print "1..192\n";
11+
print "1..193\n";
1212

1313
sub failed {
1414
my ($got, $expected, $name) = @_;
@@ -673,6 +673,18 @@ is $@, "", 'substr keys assignment';
673673
is ($@, "", "Handles all numeric package component after ::");
674674
}
675675
676+
{
677+
my $expected = "this is the way the identifier ends; not with a bang";
678+
my $result;
679+
eval "use utf8; my \$e\x{1df8}claire = '$expected'; \$result = \${e\x{1df8}claire}";
680+
if ($@) {
681+
failed($@, "no error", "Didn't crash");
682+
}
683+
else {
684+
is ($result, $expected, "Parser can handle a continuation as 2nd char");
685+
}
686+
}
687+
676688
# Add new tests HERE (above this line)
677689
678690
# bug #74022: Loop on characters in \p{OtherIDContinue}

toke.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10933,26 +10933,26 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1093310933
* be conflated with a control character identifier. */
1093410934
if (advance) {
1093510935

10936-
/* Now parse the normal identifier.
10937-
*
10938-
* khw: The code below is buggy because we already have parsed
10939-
* and copied the first character of it. The next character
10940-
* could be any IDCONT one, not just an IDFIRST */
10936+
/* Now parse the normal identifier. But note, we already have
10937+
* parsed and copied the first character of it. That means we
10938+
* are jumping into the middle; so tell that to parse_ident.
10939+
* */
1094110940
d += advance;
1094210941
s = parse_ident(s, PL_bufend, &d, e, is_utf8,
10943-
(ALLOW_PACKAGE | CHECK_DOLLAR));
10942+
(ALLOW_PACKAGE|CHECK_DOLLAR)|IDCONT_first_OK);
1094410943
}
1094510944
else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
1094610945

1094710946
/* Now parse the control character identifier. Again, we have
10948-
* already copied the first character. */
10947+
* already copied the first character. This routine is
10948+
* sufficiently chummy with parse_ident to know that when we
10949+
* say the string isn't UTF-8, it will do the right thing in
10950+
* looking only for ASCII \w characters as identifier
10951+
* continuations */
1094910952
d++;
10950-
while (isWORDCHAR(*s) && d < e) {
10951-
*d++ = *s++;
10952-
}
10953-
if (d >= e)
10954-
croak("%s", ident_too_long);
10955-
*d = '\0';
10953+
s = parse_ident(s, PL_bufend, &d, e,
10954+
false, /* Don't allow UTF-8 */
10955+
IDCONT_first_OK);
1095610956
}
1095710957

1095810958
tmp_copline = CopLINE(PL_curcop);

0 commit comments

Comments
 (0)