@@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
101101#define XFAKEEOF 0x40
102102#define XFAKEBRACK 0x80
103103
104+ #define FROM_DOLLAR 1
105+ #define FROM_SNAIL 2
106+ #define FROM_PERCENT 3
107+ #define FROM_IDENT 4
108+ #define FROM_INTERDEPENDMAYBE 5
109+
104110#ifdef USE_UTF8_SCRIPTS
105111# define UTF cBOOL(!IN_BYTES)
106112#else
@@ -4514,7 +4520,11 @@ S_is_existing_identifier(pTHX_ char *s, Size_t len, char sigil, bool is_utf8)
45144520/* This is the one truly awful dwimmer necessary to conflate C and sed. */
45154521
45164522STATIC int
4517- S_intuit_more (pTHX_ char * s , char * e )
4523+ S_intuit_more (pTHX_ char * s , char * e ,
4524+ U8 caller_context , /* Who's calling us? basically an enum */
4525+ char * caller_s , /* If non-NULL, the name of the identifier
4526+ that resulted in this call */
4527+ Size_t caller_length ) /* And the length of that name */
45184528{
45194529 PERL_ARGS_ASSERT_INTUIT_MORE ;
45204530
@@ -4585,28 +4595,51 @@ S_intuit_more(pTHX_ char *s, char *e)
45854595 if (s [0 ] == ']' || s [0 ] == '^' )
45864596 return FALSE;
45874597
4588- /* khw: If the context of this call is $foo[...], we may be able to avoid
4589- * the heuristics below. The possibilities are:
4590- * strict @foo $foo
4591- * vars? exists exists
4592- * y n n This is an error; return false now
4593- * y n y must be a a charclass
4594- * y y n must be a a subscript
4595- * y y y ambiguous; do heuristics below
4596- * n n n ambiguous; do heuristics below
4597- * n n y ambiguous; do heuristics below, but I
4598- * wonder if the initial bias should be a
4599- * little towards charclass
4600- * n y n ambiguous; do heuristics below, but I
4601- * wonder if the initial bias should be a
4602- * little towards subscript
4603- * n y y ambiguous; do heuristics below
4604- */
4598+ bool under_strict_vars = PL_hints & HINT_STRICT_VARS ;
4599+
4600+ /* If the input is of the form '$foo[...', and there is a $foo scalar and
4601+ * no @foo array, then '...' is more likely to be a character class.
4602+ * (Under 'strict vars', we know at compile time all the accessible
4603+ * variables, so in that case it MUST be a character class.) If the
4604+ * situation is reversed, it is more likely to be (or must be) a
4605+ * subscript. */
4606+ if (caller_context == FROM_DOLLAR ) {
4607+ assert (caller_s );
4608+
4609+ /* See if there is a known scalar for the input identifier */
4610+ bool has_scalar = is_existing_identifier (caller_s , caller_length ,
4611+ '$' , UTF );
4612+
4613+ /* Repeat to see if there is a known array of the given name */
4614+ bool has_array = is_existing_identifier (caller_s , caller_length ,
4615+ '@' , UTF );
4616+
4617+ unsigned int count = has_scalar + has_array ;
4618+
4619+ /* Under strict, we need some variable to be declared. */
4620+ if (under_strict_vars ) {
4621+
4622+ /* If none are, is an error. Return false to stop useless further
4623+ * parsing. */
4624+ if (count == 0 ) {
4625+ return false;
4626+ }
4627+
4628+ /* When just one variable is declared, the construct has to match
4629+ * what the variable is. If it is an array, this must be a
4630+ * subscript which needs further processing; otherwise it is a
4631+ * character class needing nothing further. */
4632+ if (count == 1 ) {
4633+ return has_array ;
4634+ }
46054635
4606- /* Find matching ']'. khw: Actually it finds the next ']' and assumes it
4607- * matches the '['. In order to account for the possibility of the ']'
4608- * being inside the scope of \Q or preceded by an even number of
4609- * backslashes, this should be rewritten */
4636+ /* Here have both an array and a scalar with the same name. Drop
4637+ * down to use the heuristics to try to intuit which is meant */
4638+ }
4639+ }
4640+
4641+ /* Find matching ']'. khw: This means any s[1] below is guaranteed to
4642+ * exist */
46104643 const char * const send = (char * ) memchr (s , ']' , e - s );
46114644 if (! send ) /* has to be an expression */
46124645 return TRUE;
@@ -5591,7 +5624,9 @@ yyl_dollar(pTHX_ char *s)
55915624 s = skipspace (s );
55925625
55935626 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
5594- && intuit_more (s , PL_bufend )) {
5627+ && intuit_more (s , PL_bufend , FROM_DOLLAR ,
5628+ PL_tokenbuf , strlen (PL_tokenbuf )))
5629+ {
55955630 if (* s == '[' ) {
55965631 PL_tokenbuf [0 ] = '@' ;
55975632 if (ckWARN (WARN_SYNTAX )) {
@@ -6295,7 +6330,9 @@ yyl_percent(pTHX_ char *s)
62956330 PREREF (PERLY_PERCENT_SIGN );
62966331 }
62976332 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6298- && intuit_more (s , PL_bufend )) {
6333+ && intuit_more (s , PL_bufend , FROM_PERCENT ,
6334+ PL_tokenbuf , strlen (PL_tokenbuf )))
6335+ {
62996336 if (* s == '[' )
63006337 PL_tokenbuf [0 ] = '@' ;
63016338 }
@@ -6918,7 +6955,8 @@ yyl_snail(pTHX_ char *s)
69186955 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets )
69196956 s = skipspace (s );
69206957 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6921- && intuit_more (s , PL_bufend ))
6958+ && intuit_more (s , PL_bufend , FROM_SNAIL ,
6959+ PL_tokenbuf , strlen (PL_tokenbuf )))
69226960 {
69236961 if (* s == '{' )
69246962 PL_tokenbuf [0 ] = '%' ;
@@ -10004,7 +10042,8 @@ Perl_yylex(pTHX)
1000410042 return yylex ();
1000510043
1000610044 case LEX_INTERPENDMAYBE :
10007- if (intuit_more (PL_bufptr , PL_bufend )) {
10045+ if (intuit_more (PL_bufptr , PL_bufend , FROM_INTERDEPENDMAYBE , NULL , 0 ))
10046+ {
1000810047 PL_lex_state = LEX_INTERPNORMAL ; /* false alarm, more expr */
1000910048 break ;
1001010049 }
@@ -10818,7 +10857,7 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
1081810857 }
1081910858 else if ( PL_lex_state == LEX_INTERPNORMAL
1082010859 && !PL_lex_brackets
10821- && !intuit_more (s , PL_bufend ))
10860+ && !intuit_more (s , PL_bufend , FROM_IDENT , NULL , 0 ))
1082210861 PL_lex_state = LEX_INTERPEND ;
1082310862 return s ;
1082410863}
0 commit comments