@@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
101101#define XFAKEEOF 0x40
102102#define XFAKEBRACK 0x80
103103
104+ #define FROM_DOLLAR 1
105+ #define FROM_SNAIL 2
106+ #define FROM_PERCENT 3
107+ #define FROM_IDENT 4
108+ #define FROM_INTERDEPENDMAYBE 5
109+
104110#ifdef USE_UTF8_SCRIPTS
105111# define UTF cBOOL(!IN_BYTES)
106112#else
@@ -4517,7 +4523,8 @@ S_is_existing_identifier(pTHX_ char *s, Size_t len, char sigil, bool is_utf8)
45174523/* This is the one truly awful dwimmer necessary to conflate C and sed. */
45184524
45194525STATIC int
4520- S_intuit_more (pTHX_ char * s , char * e )
4526+ S_intuit_more (pTHX_ char * s , char * e ,
4527+ U8 caller_context , char * caller_s , Size_t caller_length )
45214528{
45224529 PERL_ARGS_ASSERT_INTUIT_MORE ;
45234530
@@ -4581,6 +4588,49 @@ S_intuit_more(pTHX_ char *s, char *e)
45814588 if (s [0 ] == ']' || s [0 ] == '^' )
45824589 return FALSE;
45834590
4591+ bool under_strict_vars = PL_hints & HINT_STRICT_VARS ;
4592+
4593+ /* If the input is of the form '$foo[...', and there is a $foo scalar and
4594+ * no @foo array, then '...' is more likely to be a character class.
4595+ * (Under 'strict vars', we know at compile time all the accessible
4596+ * variables, so in that case it MUST be a character class.) If the
4597+ * situation is reversed, it is more likely or must be a subscript */
4598+ if (caller_context == FROM_DOLLAR ) {
4599+ assert (caller_s );
4600+
4601+ /* See if there is a known scalar for what our caller is asking about.
4602+ * */
4603+ bool has_scalar = is_existing_identifier (caller_s , caller_length ,
4604+ '$' , UTF );
4605+
4606+ /* Repeat to see if there is a known array of the given name */
4607+ bool has_array = is_existing_identifier (caller_s , caller_length ,
4608+ '@' , UTF );
4609+
4610+ unsigned int count = has_scalar + has_array ;
4611+
4612+ /* Under strict, we need some variable to be declared. */
4613+ if (under_strict_vars ) {
4614+
4615+ /* If none are, is an error, return false to stop useless further
4616+ * parsing. */
4617+ if (count == 0 ) {
4618+ return false;
4619+ }
4620+
4621+ /* When just one variable is declared, the construct has to match
4622+ * what the variable is. If it is an array, this must be a
4623+ * subscript which needs further processing; otherwise it is a
4624+ * character class needing nothing further. */
4625+ if (count == 1 ) {
4626+ return has_array ;
4627+ }
4628+
4629+ /* Here have both an array and a scalar with the same name. Drop
4630+ * down to use the heuristics to try to intuit which is meant */
4631+ }
4632+ }
4633+
45844634 /* Find matching ']'. khw: This means any s[1] below is guaranteed to
45854635 * exist */
45864636 const char * const send = (char * ) memchr (s , ']' , e - s );
@@ -5413,7 +5463,9 @@ yyl_dollar(pTHX_ char *s)
54135463 s = skipspace (s );
54145464
54155465 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
5416- && intuit_more (s , PL_bufend )) {
5466+ && intuit_more (s , PL_bufend , FROM_DOLLAR ,
5467+ PL_tokenbuf , strlen (PL_tokenbuf )))
5468+ {
54175469 if (* s == '[' ) {
54185470 PL_tokenbuf [0 ] = '@' ;
54195471 if (ckWARN (WARN_SYNTAX )) {
@@ -6117,7 +6169,9 @@ yyl_percent(pTHX_ char *s)
61176169 PREREF (PERLY_PERCENT_SIGN );
61186170 }
61196171 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6120- && intuit_more (s , PL_bufend )) {
6172+ && intuit_more (s , PL_bufend , FROM_PERCENT ,
6173+ PL_tokenbuf , strlen (PL_tokenbuf )))
6174+ {
61216175 if (* s == '[' )
61226176 PL_tokenbuf [0 ] = '@' ;
61236177 }
@@ -6739,7 +6793,8 @@ yyl_snail(pTHX_ char *s)
67396793 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets )
67406794 s = skipspace (s );
67416795 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6742- && intuit_more (s , PL_bufend ))
6796+ && intuit_more (s , PL_bufend , FROM_SNAIL ,
6797+ PL_tokenbuf , strlen (PL_tokenbuf )))
67436798 {
67446799 if (* s == '{' )
67456800 PL_tokenbuf [0 ] = '%' ;
@@ -9822,7 +9877,8 @@ Perl_yylex(pTHX)
98229877 return yylex ();
98239878
98249879 case LEX_INTERPENDMAYBE :
9825- if (intuit_more (PL_bufptr , PL_bufend )) {
9880+ if (intuit_more (PL_bufptr , PL_bufend , FROM_INTERDEPENDMAYBE , NULL , 0 ))
9881+ {
98269882 PL_lex_state = LEX_INTERPNORMAL ; /* false alarm, more expr */
98279883 break ;
98289884 }
@@ -10636,7 +10692,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
1063610692 }
1063710693 else if ( PL_lex_state == LEX_INTERPNORMAL
1063810694 && !PL_lex_brackets
10639- && !intuit_more (s , PL_bufend ))
10695+ && !intuit_more (s , PL_bufend , FROM_IDENT , NULL , 0 ))
1064010696 PL_lex_state = LEX_INTERPEND ;
1064110697 return s ;
1064210698}
0 commit comments