@@ -246,9 +246,11 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
246
246
initialize (Offset, EndOffset);
247
247
}
248
248
249
- Lexer::Lexer (Lexer &Parent, State BeginState, State EndState)
249
+ Lexer::Lexer (const Lexer &Parent, State BeginState, State EndState,
250
+ bool EnableDiagnostics)
250
251
: Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
251
- Parent.getUnderlyingDiags(), Parent.LexMode,
252
+ EnableDiagnostics ? Parent.getUnderlyingDiags() : nullptr,
253
+ Parent.LexMode,
252
254
Parent.IsHashbangAllowed
253
255
? HashbangMode::Allowed
254
256
: HashbangMode::Disallowed,
@@ -1970,27 +1972,76 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
1970
1972
}
1971
1973
}
1972
1974
1973
- bool Lexer::tryLexRegexLiteral (const char *TokStart) {
1975
+ bool Lexer::isPotentialUnskippableBareSlashRegexLiteral (const Token &Tok) const {
1976
+ if (!LangOpts.EnableBareSlashRegexLiterals )
1977
+ return false ;
1978
+
1979
+ // A `/.../` regex literal may only start on a binary or prefix operator.
1980
+ if (Tok.isNot (tok::oper_prefix, tok::oper_binary_spaced,
1981
+ tok::oper_binary_unspaced)) {
1982
+ return false ;
1983
+ }
1984
+ auto SlashIdx = Tok.getText ().find (" /" );
1985
+ if (SlashIdx == StringRef::npos)
1986
+ return false ;
1987
+
1988
+ auto Offset = getBufferPtrForSourceLoc (Tok.getLoc ()) + SlashIdx;
1989
+ bool CompletelyErroneous;
1990
+ if (tryScanRegexLiteral (Offset, /* MustBeRegex*/ false , /* Diags*/ nullptr ,
1991
+ CompletelyErroneous)) {
1992
+ // Definitely a regex literal.
1993
+ return true ;
1994
+ }
1995
+
1996
+ // A prefix '/' can never be a regex literal if it failed a heuristic.
1997
+ if (Tok.is (tok::oper_prefix))
1998
+ return false ;
1999
+
2000
+ // We either don't have a regex literal, or we failed a heuristic. We now need
2001
+ // to make sure we don't have an unbalanced `{` or `}`, as that would have the
2002
+ // potential to change the range of a skipped body if we try to more
2003
+ // agressively lex a regex literal during normal parsing. If we have balanced
2004
+ // `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
2005
+ // worse diagnostic.
2006
+ // FIXME: We ought to silence lexer diagnostics when skipping, this would
2007
+ // avoid emitting a worse diagnostic.
2008
+ auto *EndPtr = tryScanRegexLiteral (Offset, /* MustBeRegex*/ true ,
2009
+ /* Diags*/ nullptr , CompletelyErroneous);
2010
+ if (!EndPtr)
2011
+ return false ;
2012
+
2013
+ Lexer L (*this , State (Tok.getLoc ().getAdvancedLoc (Tok.getLength ())),
2014
+ State (getSourceLoc (EndPtr)), /* EnableDiagnostics*/ false );
2015
+
2016
+ unsigned OpenBraces = 0 ;
2017
+ while (L.peekNextToken ().isNot (tok::eof)) {
2018
+ Token Tok;
2019
+ L.lex (Tok);
2020
+ if (Tok.is (tok::l_brace))
2021
+ OpenBraces += 1 ;
2022
+ if (Tok.is (tok::r_brace)) {
2023
+ if (OpenBraces == 0 )
2024
+ return true ;
2025
+ OpenBraces -= 1 ;
2026
+ }
2027
+ }
2028
+
2029
+ // If we have an unbalanced `{`, this is unskippable.
2030
+ return OpenBraces != 0 ;
2031
+ }
2032
+
2033
+ const char *Lexer::tryScanRegexLiteral (const char *TokStart, bool MustBeRegex,
2034
+ DiagnosticEngine *Diags,
2035
+ bool &CompletelyErroneous) const {
1974
2036
// We need to have experimental string processing enabled, and have the
1975
2037
// parsing logic for regex literals available.
1976
2038
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1977
- return false ;
2039
+ return nullptr ;
1978
2040
1979
- bool MustBeRegex = true ;
1980
2041
bool IsForwardSlash = (*TokStart == ' /' );
1981
2042
1982
2043
// Check if we're able to lex a `/.../` regex.
1983
2044
if (IsForwardSlash) {
1984
- switch (ForwardSlashRegexMode) {
1985
- case LexerForwardSlashRegexMode::None:
1986
- return false ;
1987
- case LexerForwardSlashRegexMode::Tentative:
1988
- MustBeRegex = false ;
1989
- break ;
1990
- case LexerForwardSlashRegexMode::Always:
1991
- break ;
1992
- }
1993
-
1994
2045
// For `/.../` regex literals, we need to ban space and tab at the start of
1995
2046
// a regex to avoid ambiguity with operator chains, e.g:
1996
2047
//
@@ -2008,23 +2059,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2008
2059
case ' ' :
2009
2060
case ' \t ' : {
2010
2061
if (!MustBeRegex)
2011
- return false ;
2062
+ return nullptr ;
2012
2063
2013
- // We must have a regex, so emit an error for space and tab.
2014
- StringRef DiagChar;
2015
- switch (*RegexContentStart) {
2016
- case ' ' :
2017
- DiagChar = " space" ;
2018
- break ;
2019
- case ' \t ' :
2020
- DiagChar = " tab" ;
2021
- break ;
2022
- default :
2023
- llvm_unreachable (" Unhandled case" );
2064
+ if (Diags) {
2065
+ // We must have a regex, so emit an error for space and tab.
2066
+ StringRef DiagChar;
2067
+ switch (*RegexContentStart) {
2068
+ case ' ' :
2069
+ DiagChar = " space" ;
2070
+ break ;
2071
+ case ' \t ' :
2072
+ DiagChar = " tab" ;
2073
+ break ;
2074
+ default :
2075
+ llvm_unreachable (" Unhandled case" );
2076
+ }
2077
+ Diags->diagnose (getSourceLoc (RegexContentStart),
2078
+ diag::lex_regex_literal_invalid_starting_char, DiagChar)
2079
+ .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2024
2080
}
2025
- diagnose (RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2026
- DiagChar)
2027
- .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2028
2081
break ;
2029
2082
}
2030
2083
default :
@@ -2037,25 +2090,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2037
2090
// - CompletelyErroneous will be set if there was an error that cannot be
2038
2091
// recovered from.
2039
2092
auto *Ptr = TokStart;
2040
- bool CompletelyErroneous = regexLiteralLexingFn (
2041
- &Ptr, BufferEnd, MustBeRegex,
2042
- getBridgedOptionalDiagnosticEngine (getTokenDiags ()));
2093
+ CompletelyErroneous = regexLiteralLexingFn (
2094
+ &Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine (Diags));
2043
2095
2044
2096
// If we didn't make any lexing progress, this isn't a regex literal and we
2045
2097
// should fallback to lexing as something else.
2046
2098
if (Ptr == TokStart)
2047
- return false ;
2099
+ return nullptr ;
2048
2100
2049
2101
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2050
2102
// We prefer to lex the comment as it's more likely than not that is what
2051
2103
// the user is expecting.
2052
2104
// TODO: This should be sunk into the Swift library.
2053
2105
if (IsForwardSlash && Ptr[-1 ] == ' /' && (*Ptr == ' *' || *Ptr == ' /' )) {
2054
2106
if (!MustBeRegex)
2055
- return false ;
2056
-
2057
- diagnose (TokStart, diag::lex_regex_literal_unterminated);
2107
+ return nullptr ;
2058
2108
2109
+ if (Diags) {
2110
+ Diags->diagnose (getSourceLoc (TokStart),
2111
+ diag::lex_regex_literal_unterminated);
2112
+ }
2059
2113
// Move the pointer back to the '/' of the comment.
2060
2114
Ptr--;
2061
2115
}
@@ -2088,7 +2142,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2088
2142
2089
2143
// Invalid, so bail.
2090
2144
if (GroupDepth == 0 )
2091
- return false ;
2145
+ return nullptr ;
2092
2146
2093
2147
GroupDepth -= 1 ;
2094
2148
break ;
@@ -2101,9 +2155,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2101
2155
}
2102
2156
}
2103
2157
}
2158
+ assert (Ptr > TokStart && Ptr <= BufferEnd);
2159
+ return Ptr;
2160
+ }
2161
+
2162
+ bool Lexer::tryLexRegexLiteral (const char *TokStart) {
2163
+ bool IsForwardSlash = (*TokStart == ' /' );
2164
+ bool MustBeRegex = true ;
2165
+
2166
+ if (IsForwardSlash) {
2167
+ switch (ForwardSlashRegexMode) {
2168
+ case LexerForwardSlashRegexMode::None:
2169
+ return false ;
2170
+ case LexerForwardSlashRegexMode::Tentative:
2171
+ MustBeRegex = false ;
2172
+ break ;
2173
+ case LexerForwardSlashRegexMode::Always:
2174
+ break ;
2175
+ }
2176
+ }
2177
+ bool CompletelyErroneous = false ;
2178
+ auto *Ptr = tryScanRegexLiteral (TokStart, MustBeRegex, getTokenDiags (),
2179
+ CompletelyErroneous);
2180
+ if (!Ptr)
2181
+ return false ;
2104
2182
2105
2183
// Update to point to where we ended regex lexing.
2106
- assert (Ptr > TokStart && Ptr <= BufferEnd);
2107
2184
CurPtr = Ptr;
2108
2185
2109
2186
// If the lexing was completely erroneous, form an unknown token.
0 commit comments