@@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
101101#define XFAKEEOF 0x40
102102#define XFAKEBRACK 0x80
103103
104+ #define FROM_DOLLAR 1
105+ #define FROM_SNAIL 2
106+ #define FROM_PERCENT 3
107+ #define FROM_IDENT 4
108+ #define FROM_INTERDEPENDMAYBE 5
109+
104110#ifdef USE_UTF8_SCRIPTS
105111# define UTF cBOOL(!IN_BYTES)
106112#else
@@ -4547,7 +4553,7 @@ S_is_existing_identifier(pTHX_ char *s, char *e, char sigil, bool is_utf8)
45474553/* This is the one truly awful dwimmer necessary to conflate C and sed. */
45484554
45494555STATIC int
4550- S_intuit_more (pTHX_ char * s , char * e )
4556+ S_intuit_more (pTHX_ char * s , char * e , U8 caller_context )
45514557{
45524558 PERL_ARGS_ASSERT_INTUIT_MORE ;
45534559
@@ -4608,23 +4614,47 @@ S_intuit_more(pTHX_ char *s, char *e)
46084614 if (s [0 ] == ']' || s [0 ] == '^' )
46094615 return FALSE;
46104616
4611- /* khw: If the context of this call is $foo[...], we may be able to avoid
4612- * the heuristics below. The possibilities are:
4613- * strict @foo $foo
4614- * vars? exists exists
4615- * y n n This is an error; return false now
4616- * y n y must be a a charclass
4617- * y y n must be a a subscript
4618- * y y y ambiguous; do heuristics below
4619- * n n n ambiguous; do heuristics below
4620- * n n y ambiguous; do heuristics below, but I
4621- * wonder if the initial bias should be a
4622- * little towards charclass
4623- * n y n ambiguous; do heuristics below, but I
4624- * wonder if the initial bias should be a
4625- * little towards subscript
4626- * n y y ambiguous; do heuristics below
4627- */
4617+
4618+ /* If the input is of the form '$foo[...', and there is a $foo scalar and
4619+ * no @foo array, then '...' is more likely to be a character class.
4620+ * (Under 'strict vars', we know at compile time all the accessible
4621+ * variables, so in that case it MUST be a character class.) If the
4622+ * situation is reversed, it is more likely or must be a subscript */
4623+ if ( caller_context == FROM_DOLLAR
4624+ || (caller_context == FROM_INTERDEPENDMAYBE && PL_tokenbuf [0 ] == '@' ))
4625+ {
4626+ char * e = PL_tokenbuf + sizeof (PL_tokenbuf ) + 1 ;
4627+
4628+ /* See if there is a known scalar for what our caller is asking about.
4629+ * */
4630+ bool has_scalar = is_existing_identifier (PL_tokenbuf , e , '$' , UTF );
4631+
4632+ /* Repeat to see if there is a known array of the given name */
4633+ bool has_array = is_existing_identifier (PL_tokenbuf , e , '@' , UTF );
4634+
4635+ unsigned int count = has_scalar + has_array ;
4636+
4637+ /* Under strict, we need some variable to be declared. */
4638+ if (PL_hints & HINT_STRICT_VARS ) {
4639+
4640+ /* If none are, is an error, return false to stop useless further
4641+ * parsing. */
4642+ if (count == 0 ) {
4643+ return false;
4644+ }
4645+
4646+ /* When just one variable is declared, the construct has to match
4647+ * what the variable is. If it is an array, this must be a
4648+ * subscript which needs further processing; otherwise it is a
4649+ * character class needing nothing further. */
4650+ if (count == 1 ) {
4651+ return has_array ;
4652+ }
4653+
4654+ /* Here have both an array and a scalar with the same name. Drop
4655+ * down to use the heuristics to try to intuit which is meant */
4656+ }
4657+ }
46284658
46294659 /* Find matching ']'. khw: Actually it finds the next ']' and assumes it
46304660 * matches the '['. In order to account for the possibility of the ']'
@@ -5585,7 +5615,7 @@ yyl_dollar(pTHX_ char *s)
55855615 s = skipspace (s );
55865616
55875617 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
5588- && intuit_more (s , PL_bufend )) {
5618+ && intuit_more (s , PL_bufend , FROM_DOLLAR )) {
55895619 if (* s == '[' ) {
55905620 PL_tokenbuf [0 ] = '@' ;
55915621 if (ckWARN (WARN_SYNTAX )) {
@@ -6288,7 +6318,7 @@ yyl_percent(pTHX_ char *s)
62886318 PREREF (PERLY_PERCENT_SIGN );
62896319 }
62906320 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6291- && intuit_more (s , PL_bufend )) {
6321+ && intuit_more (s , PL_bufend , FROM_PERCENT )) {
62926322 if (* s == '[' )
62936323 PL_tokenbuf [0 ] = '@' ;
62946324 }
@@ -6910,7 +6940,7 @@ yyl_snail(pTHX_ char *s)
69106940 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets )
69116941 s = skipspace (s );
69126942 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6913- && intuit_more (s , PL_bufend ))
6943+ && intuit_more (s , PL_bufend , FROM_SNAIL ))
69146944 {
69156945 if (* s == '{' )
69166946 PL_tokenbuf [0 ] = '%' ;
@@ -9993,7 +10023,7 @@ Perl_yylex(pTHX)
999310023 return yylex ();
999410024
999510025 case LEX_INTERPENDMAYBE :
9996- if (intuit_more (PL_bufptr , PL_bufend )) {
10026+ if (intuit_more (PL_bufptr , PL_bufend , FROM_INTERDEPENDMAYBE )) {
999710027 PL_lex_state = LEX_INTERPNORMAL ; /* false alarm, more expr */
999810028 break ;
999910029 }
@@ -10807,7 +10837,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
1080710837 }
1080810838 else if ( PL_lex_state == LEX_INTERPNORMAL
1080910839 && !PL_lex_brackets
10810- && !intuit_more (s , PL_bufend ))
10840+ && !intuit_more (s , PL_bufend , FROM_IDENT ))
1081110841 PL_lex_state = LEX_INTERPEND ;
1081210842 return s ;
1081310843}
0 commit comments