@@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
101101#define XFAKEEOF 0x40
102102#define XFAKEBRACK 0x80
103103
104+ #define FROM_DOLLAR 1
105+ #define FROM_SNAIL 2
106+ #define FROM_PERCENT 3
107+ #define FROM_IDENT 4
108+ #define FROM_INTERDEPENDMAYBE 5
109+
104110#ifdef USE_UTF8_SCRIPTS
105111# define UTF cBOOL(!IN_BYTES)
106112#else
@@ -4517,7 +4523,11 @@ S_is_existing_identifier(pTHX_ char *s, Size_t len, char sigil, bool is_utf8)
45174523/* This is the one truly awful dwimmer necessary to conflate C and sed. */
45184524
45194525STATIC int
4520- S_intuit_more (pTHX_ char * s , char * e )
4526+ S_intuit_more (pTHX_ char * s , char * e ,
4527+ U8 caller_context , /* Who's calling us? basically an enum */
4528+ char * caller_s , /* If non-NULL, the name of the identifier
4529+ that resulted in this call */
4530+ Size_t caller_length ) /* And the length of that name */
45214531{
45224532 PERL_ARGS_ASSERT_INTUIT_MORE ;
45234533
@@ -4581,6 +4591,49 @@ S_intuit_more(pTHX_ char *s, char *e)
45814591 if (s [0 ] == ']' || s [0 ] == '^' )
45824592 return FALSE;
45834593
4594+ bool under_strict_vars = PL_hints & HINT_STRICT_VARS ;
4595+
4596+ /* If the input is of the form '$foo[...', and there is a $foo scalar and
4597+ * no @foo array, then '...' is more likely to be a character class.
4598+ * (Under 'strict vars', we know at compile time all the accessible
4599+ * variables, so in that case it MUST be a character class.) If the
4600+ * situation is reversed, it is more likely to be (or must be) a
4601+ * subscript. */
4602+ if (caller_context == FROM_DOLLAR ) {
4603+ assert (caller_s );
4604+
4605+ /* See if there is a known scalar for the input identifier */
4606+ bool has_scalar = is_existing_identifier (caller_s , caller_length ,
4607+ '$' , UTF );
4608+
4609+ /* Repeat to see if there is a known array of the given name */
4610+ bool has_array = is_existing_identifier (caller_s , caller_length ,
4611+ '@' , UTF );
4612+
4613+ unsigned int count = has_scalar + has_array ;
4614+
4615+ /* Under strict, we need some variable to be declared. */
4616+ if (under_strict_vars ) {
4617+
4618+ /* If none are, is an error. Return false to stop useless further
4619+ * parsing. */
4620+ if (count == 0 ) {
4621+ return false;
4622+ }
4623+
4624+ /* When just one variable is declared, the construct has to match
4625+ * what the variable is. If it is an array, this must be a
4626+ * subscript which needs further processing; otherwise it is a
4627+ * character class needing nothing further. */
4628+ if (count == 1 ) {
4629+ return has_array ;
4630+ }
4631+
4632+ /* Here have both an array and a scalar with the same name. Drop
4633+ * down to use the heuristics to try to intuit which is meant */
4634+ }
4635+ }
4636+
45844637 /* Find matching ']'. khw: This means any s[1] below is guaranteed to
45854638 * exist */
45864639 const char * const send = (char * ) memchr (s , ']' , e - s );
@@ -5413,7 +5466,9 @@ yyl_dollar(pTHX_ char *s)
54135466 s = skipspace (s );
54145467
54155468 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
5416- && intuit_more (s , PL_bufend )) {
5469+ && intuit_more (s , PL_bufend , FROM_DOLLAR ,
5470+ PL_tokenbuf , strlen (PL_tokenbuf )))
5471+ {
54175472 if (* s == '[' ) {
54185473 PL_tokenbuf [0 ] = '@' ;
54195474 if (ckWARN (WARN_SYNTAX )) {
@@ -6117,7 +6172,9 @@ yyl_percent(pTHX_ char *s)
61176172 PREREF (PERLY_PERCENT_SIGN );
61186173 }
61196174 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6120- && intuit_more (s , PL_bufend )) {
6175+ && intuit_more (s , PL_bufend , FROM_PERCENT ,
6176+ PL_tokenbuf , strlen (PL_tokenbuf )))
6177+ {
61216178 if (* s == '[' )
61226179 PL_tokenbuf [0 ] = '@' ;
61236180 }
@@ -6739,7 +6796,8 @@ yyl_snail(pTHX_ char *s)
67396796 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets )
67406797 s = skipspace (s );
67416798 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6742- && intuit_more (s , PL_bufend ))
6799+ && intuit_more (s , PL_bufend , FROM_SNAIL ,
6800+ PL_tokenbuf , strlen (PL_tokenbuf )))
67436801 {
67446802 if (* s == '{' )
67456803 PL_tokenbuf [0 ] = '%' ;
@@ -9822,7 +9880,8 @@ Perl_yylex(pTHX)
98229880 return yylex ();
98239881
98249882 case LEX_INTERPENDMAYBE :
9825- if (intuit_more (PL_bufptr , PL_bufend )) {
9883+ if (intuit_more (PL_bufptr , PL_bufend , FROM_INTERDEPENDMAYBE , NULL , 0 ))
9884+ {
98269885 PL_lex_state = LEX_INTERPNORMAL ; /* false alarm, more expr */
98279886 break ;
98289887 }
@@ -10636,7 +10695,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
1063610695 }
1063710696 else if ( PL_lex_state == LEX_INTERPNORMAL
1063810697 && !PL_lex_brackets
10639- && !intuit_more (s , PL_bufend ))
10698+ && !intuit_more (s , PL_bufend , FROM_IDENT , NULL , 0 ))
1064010699 PL_lex_state = LEX_INTERPEND ;
1064110700 return s ;
1064210701}
0 commit comments