@@ -4758,13 +4758,70 @@ S_intuit_more(pTHX_ char *s, char *e,
47584758 return false;
47594759 }
47604760
4761- /* khw: This only looks at global variables; lexicals came
4762- * later, and this hasn't been updated. Ouch!! */
4763- if ( len > 1
4764- && gv_fetchpvn_flags (tmpbuf + 1 ,
4765- len ,
4766- UTF ? SVf_UTF8 : 0 ,
4767- SVt_PV ))
4761+ /* If there is extra stuff in the source, like braces, it means
4762+ * this is almost definitely intended to be an identifier */
4763+ bool decorated ;
4764+ decorated = (Size_t ) (s_after_ident - s ) > len ;
4765+
4766+ if (isDIGIT_A (tmpbuf [1 ])) {
4767+
4768+ /* &41 and &4b are illegal subroutine names so is an error or
4769+ * a charclass */
4770+ if (s [0 ] == '&' ) {
4771+ return false;
4772+ }
4773+
4774+ /* Here, matches [$@]\d+. If the next input character is a
4775+ * \w, we would have something like $456x, which is an illegal
4776+ * identifer, so is an error or a charclass */
4777+ if ( ! decorated
4778+ && isWORDCHAR_lazy_if_safe (s_after_ident ,
4779+ PL_bufend , UTF ))
4780+ {
4781+ return false;
4782+ }
4783+
4784+ /* We don't get here if this potential identifier starts with
4785+ * leading zeros, due to the logic in scan_ident. */
4786+ assert (len == 1 || tmpbuf [0 ] != '0' );
4787+
4788+ /* The chances are vanishingly small that someone is going to
4789+ * want [$0] to expand to the program's name in a character
4790+ * class. But, what would the program's name be doing as part
4791+ * of a subscript either? The only likely scenario is that
4792+ * this is meant to be a charclass matching either '$' or '0'.
4793+ * */
4794+ if (tmpbuf [1 ] == '0' ) {
4795+ return false;
4796+ }
4797+
4798+ /* Here it is either something like $1 which is supposed to
4799+ * match either dollar or 1, or it is supposed to expand to
4800+ * what is in $1 left over from a capturing group from the
4801+ * previous pattern match. In the latter case, it could be
4802+ * either a part of wanting to calculate a subscript, or to
4803+ * use as the contents of as part of the character class.
4804+ * Larger (undecorated) numbers are much less likely to have
4805+ * had capturing groups, so they lean more towards a
4806+ * charclass. 100 is what this function has traditionally
4807+ * used for len>1; khw thinks there is no bias one way or the
4808+ * other for length 1 ones. But has chosen 100 for decorated
4809+ * identifiers
4810+ *
4811+ * XXX long enough identifiers could probably return false
4812+ * immediately here, rather than using weights. */
4813+ if (decorated || len > 1 ) {
4814+ weight -= 100 ;
4815+ }
4816+ }
4817+ else if ( len > 1
4818+ /* khw: This only looks at global variables; lexicals
4819+ * came later, and this hasn't been updated. Ouch!!
4820+ * */
4821+ && gv_fetchpvn_flags (tmpbuf + 1 ,
4822+ len ,
4823+ UTF ? SVf_UTF8 : 0 ,
4824+ SVt_PV ))
47684825 {
47694826 weight -= 100 ;
47704827
@@ -4817,7 +4874,6 @@ S_intuit_more(pTHX_ char *s, char *e,
48174874 * \? must be subscript for things like \d, but not \a.
48184875 */
48194876
4820-
48214877 case '\\' :
48224878 if (s [1 ] == '\0' ) {
48234879 /* \ followed by NUL strongly indicates character class */
0 commit comments