Skip to content

Commit cbe1dc5

Browse files
committed
intuit_more: 'use strict' allows much better handling
Most code these days runs under 'use strict'. That allows us to resolve ambiguity without resorting to heuristics in far more cases than before. This commit adds a parameter to intuit_more() that gives the context it is being called from. And when that call is to resolve what $foo[...] is supposed to mean, we can look up foo to see if it is an array or a scalar. If the former, the "..." must be a subscript; if a scalar, it must be a charclass. Only if there is both a $foo and an @foo is there ambiguity. If so, we drop down to using the heuristics
1 parent 7fba0cf commit cbe1dc5

File tree

4 files changed

+68
-9
lines changed

4 files changed

+68
-9
lines changed

embed.fnc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6082,7 +6082,10 @@ S |int |intuit_method |NN char *start \
60826082
|NULLOK SV *ioname \
60836083
|NULLOK NOCHECK CV *cv
60846084
S |int |intuit_more |NN char *s \
6085-
|NN char *e
6085+
|NN char *e \
6086+
|U8 caller_context \
6087+
|NULLOK char *caller_s \
6088+
|Size_t caller_length
60866089
S |bool |is_existing_identifier \
60876090
|NN char *s \
60886091
|Size_t len \

embed.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1684,7 +1684,7 @@
16841684
# define get_and_check_backslash_N_name_wrapper(a,b) S_get_and_check_backslash_N_name_wrapper(aTHX_ a,b)
16851685
# define incline(a,b) S_incline(aTHX_ a,b)
16861686
# define intuit_method(a,b,c) S_intuit_method(aTHX_ a,b,c)
1687-
# define intuit_more(a,b) S_intuit_more(aTHX_ a,b)
1687+
# define intuit_more(a,b,c,d,e) S_intuit_more(aTHX_ a,b,c,d,e)
16881688
# define is_existing_identifier(a,b,c,d) S_is_existing_identifier(aTHX_ a,b,c,d)
16891689
# define lop(a,b,c,d) S_lop(aTHX_ a,b,c,d)
16901690
# define missingterm(a,b) S_missingterm(aTHX_ a,b)

proto.h

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

toke.c

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
101101
#define XFAKEEOF 0x40
102102
#define XFAKEBRACK 0x80
103103

104+
#define FROM_DOLLAR 1
105+
#define FROM_SNAIL 2
106+
#define FROM_PERCENT 3
107+
#define FROM_IDENT 4
108+
#define FROM_INTERDEPENDMAYBE 5
109+
104110
#ifdef USE_UTF8_SCRIPTS
105111
# define UTF cBOOL(!IN_BYTES)
106112
#else
@@ -4517,7 +4523,8 @@ S_is_existing_identifier(pTHX_ char *s, Size_t len, char sigil, bool is_utf8)
45174523
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
45184524

45194525
STATIC int
4520-
S_intuit_more(pTHX_ char *s, char *e)
4526+
S_intuit_more(pTHX_ char *s, char *e,
4527+
U8 caller_context, char * caller_s, Size_t caller_length)
45214528
{
45224529
PERL_ARGS_ASSERT_INTUIT_MORE;
45234530

@@ -4581,6 +4588,49 @@ S_intuit_more(pTHX_ char *s, char *e)
45814588
if (s[0] == ']' || s[0] == '^')
45824589
return FALSE;
45834590

4591+
bool under_strict_vars = PL_hints & HINT_STRICT_VARS;
4592+
4593+
/* If the input is of the form '$foo[...', and there is a $foo scalar and
4594+
* no @foo array, then '...' is more likely to be a character class.
4595+
* (Under 'strict vars', we know at compile time all the accessible
4596+
* variables, so in that case it MUST be a character class.) If the
4597+
* situation is reversed, it is more likely or must be a subscript */
4598+
if (caller_context == FROM_DOLLAR) {
4599+
assert (caller_s);
4600+
4601+
/* See if there is a known scalar for what our caller is asking about.
4602+
* */
4603+
bool has_scalar = is_existing_identifier(caller_s, caller_length,
4604+
'$', UTF);
4605+
4606+
/* Repeat to see if there is a known array of the given name */
4607+
bool has_array = is_existing_identifier(caller_s, caller_length,
4608+
'@', UTF);
4609+
4610+
unsigned int count = has_scalar + has_array;
4611+
4612+
/* Under strict, we need some variable to be declared. */
4613+
if (under_strict_vars) {
4614+
4615+
/* If none are, is an error, return false to stop useless further
4616+
* parsing. */
4617+
if (count == 0) {
4618+
return false;
4619+
}
4620+
4621+
/* When just one variable is declared, the construct has to match
4622+
* what the variable is. If it is an array, this must be a
4623+
* subscript which needs further processing; otherwise it is a
4624+
* character class needing nothing further. */
4625+
if (count == 1) {
4626+
return has_array;
4627+
}
4628+
4629+
/* Here have both an array and a scalar with the same name. Drop
4630+
* down to use the heuristics to try to intuit which is meant */
4631+
}
4632+
}
4633+
45844634
/* Find matching ']'. khw: This means any s[1] below is guaranteed to
45854635
* exist */
45864636
const char * const send = (char *) memchr(s, ']', e - s);
@@ -5413,7 +5463,9 @@ yyl_dollar(pTHX_ char *s)
54135463
s = skipspace(s);
54145464

54155465
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5416-
&& intuit_more(s, PL_bufend)) {
5466+
&& intuit_more(s, PL_bufend, FROM_DOLLAR,
5467+
PL_tokenbuf, strlen(PL_tokenbuf)))
5468+
{
54175469
if (*s == '[') {
54185470
PL_tokenbuf[0] = '@';
54195471
if (ckWARN(WARN_SYNTAX)) {
@@ -6117,7 +6169,9 @@ yyl_percent(pTHX_ char *s)
61176169
PREREF(PERLY_PERCENT_SIGN);
61186170
}
61196171
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6120-
&& intuit_more(s, PL_bufend)) {
6172+
&& intuit_more(s, PL_bufend, FROM_PERCENT,
6173+
PL_tokenbuf, strlen(PL_tokenbuf)))
6174+
{
61216175
if (*s == '[')
61226176
PL_tokenbuf[0] = '@';
61236177
}
@@ -6739,7 +6793,8 @@ yyl_snail(pTHX_ char *s)
67396793
if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
67406794
s = skipspace(s);
67416795
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6742-
&& intuit_more(s, PL_bufend))
6796+
&& intuit_more(s, PL_bufend, FROM_SNAIL,
6797+
PL_tokenbuf, strlen(PL_tokenbuf)))
67436798
{
67446799
if (*s == '{')
67456800
PL_tokenbuf[0] = '%';
@@ -9822,7 +9877,8 @@ Perl_yylex(pTHX)
98229877
return yylex();
98239878

98249879
case LEX_INTERPENDMAYBE:
9825-
if (intuit_more(PL_bufptr, PL_bufend)) {
9880+
if (intuit_more(PL_bufptr, PL_bufend, FROM_INTERDEPENDMAYBE, NULL, 0))
9881+
{
98269882
PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
98279883
break;
98289884
}
@@ -10636,7 +10692,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
1063610692
}
1063710693
else if ( PL_lex_state == LEX_INTERPNORMAL
1063810694
&& !PL_lex_brackets
10639-
&& !intuit_more(s, PL_bufend))
10695+
&& !intuit_more(s, PL_bufend, FROM_IDENT, NULL, 0))
1064010696
PL_lex_state = LEX_INTERPEND;
1064110697
return s;
1064210698
}

0 commit comments

Comments
 (0)