@@ -246,9 +246,11 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
246
246
initialize (Offset, EndOffset);
247
247
}
248
248
249
- Lexer::Lexer (Lexer &Parent, State BeginState, State EndState)
249
+ Lexer::Lexer (const Lexer &Parent, State BeginState, State EndState,
250
+ bool EnableDiagnostics)
250
251
: Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
251
- Parent.getUnderlyingDiags(), Parent.LexMode,
252
+ EnableDiagnostics ? Parent.getUnderlyingDiags() : nullptr,
253
+ Parent.LexMode,
252
254
Parent.IsHashbangAllowed
253
255
? HashbangMode::Allowed
254
256
: HashbangMode::Disallowed,
@@ -1978,27 +1980,76 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
1978
1980
}
1979
1981
}
1980
1982
1981
- bool Lexer::tryLexRegexLiteral (const char *TokStart) {
1983
+ bool Lexer::isPotentialUnskippableBareSlashRegexLiteral (const Token &Tok) const {
1984
+ if (!LangOpts.hasFeature (Feature::BareSlashRegexLiterals))
1985
+ return false ;
1986
+
1987
+ // A `/.../` regex literal may only start on a binary or prefix operator.
1988
+ if (Tok.isNot (tok::oper_prefix, tok::oper_binary_spaced,
1989
+ tok::oper_binary_unspaced)) {
1990
+ return false ;
1991
+ }
1992
+ auto SlashIdx = Tok.getText ().find (" /" );
1993
+ if (SlashIdx == StringRef::npos)
1994
+ return false ;
1995
+
1996
+ auto Offset = getBufferPtrForSourceLoc (Tok.getLoc ()) + SlashIdx;
1997
+ bool CompletelyErroneous;
1998
+ if (tryScanRegexLiteral (Offset, /* MustBeRegex*/ false , /* Diags*/ nullptr ,
1999
+ CompletelyErroneous)) {
2000
+ // Definitely a regex literal.
2001
+ return true ;
2002
+ }
2003
+
2004
+ // A prefix '/' can never be a regex literal if it failed a heuristic.
2005
+ if (Tok.is (tok::oper_prefix))
2006
+ return false ;
2007
+
2008
+ // We either don't have a regex literal, or we failed a heuristic. We now need
2009
+ // to make sure we don't have an unbalanced `{` or `}`, as that would have the
2010
+ // potential to change the range of a skipped body if we try to more
2011
+ // agressively lex a regex literal during normal parsing. If we have balanced
2012
+ // `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
2013
+ // worse diagnostic.
2014
+ // FIXME: We ought to silence lexer diagnostics when skipping, this would
2015
+ // avoid emitting a worse diagnostic.
2016
+ auto *EndPtr = tryScanRegexLiteral (Offset, /* MustBeRegex*/ true ,
2017
+ /* Diags*/ nullptr , CompletelyErroneous);
2018
+ if (!EndPtr)
2019
+ return false ;
2020
+
2021
+ Lexer L (*this , State (Tok.getLoc ().getAdvancedLoc (Tok.getLength ())),
2022
+ State (getSourceLoc (EndPtr)), /* EnableDiagnostics*/ false );
2023
+
2024
+ unsigned OpenBraces = 0 ;
2025
+ while (L.peekNextToken ().isNot (tok::eof)) {
2026
+ Token Tok;
2027
+ L.lex (Tok);
2028
+ if (Tok.is (tok::l_brace))
2029
+ OpenBraces += 1 ;
2030
+ if (Tok.is (tok::r_brace)) {
2031
+ if (OpenBraces == 0 )
2032
+ return true ;
2033
+ OpenBraces -= 1 ;
2034
+ }
2035
+ }
2036
+
2037
+ // If we have an unbalanced `{`, this is unskippable.
2038
+ return OpenBraces != 0 ;
2039
+ }
2040
+
2041
+ const char *Lexer::tryScanRegexLiteral (const char *TokStart, bool MustBeRegex,
2042
+ DiagnosticEngine *Diags,
2043
+ bool &CompletelyErroneous) const {
1982
2044
// We need to have experimental string processing enabled, and have the
1983
2045
// parsing logic for regex literals available.
1984
2046
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1985
- return false ;
2047
+ return nullptr ;
1986
2048
1987
- bool MustBeRegex = true ;
1988
2049
bool IsForwardSlash = (*TokStart == ' /' );
1989
2050
1990
2051
// Check if we're able to lex a `/.../` regex.
1991
2052
if (IsForwardSlash) {
1992
- switch (ForwardSlashRegexMode) {
1993
- case LexerForwardSlashRegexMode::None:
1994
- return false ;
1995
- case LexerForwardSlashRegexMode::Tentative:
1996
- MustBeRegex = false ;
1997
- break ;
1998
- case LexerForwardSlashRegexMode::Always:
1999
- break ;
2000
- }
2001
-
2002
2053
// For `/.../` regex literals, we need to ban space and tab at the start of
2003
2054
// a regex to avoid ambiguity with operator chains, e.g:
2004
2055
//
@@ -2016,23 +2067,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2016
2067
case ' ' :
2017
2068
case ' \t ' : {
2018
2069
if (!MustBeRegex)
2019
- return false ;
2070
+ return nullptr ;
2020
2071
2021
- // We must have a regex, so emit an error for space and tab.
2022
- StringRef DiagChar;
2023
- switch (*RegexContentStart) {
2024
- case ' ' :
2025
- DiagChar = " space" ;
2026
- break ;
2027
- case ' \t ' :
2028
- DiagChar = " tab" ;
2029
- break ;
2030
- default :
2031
- llvm_unreachable (" Unhandled case" );
2072
+ if (Diags) {
2073
+ // We must have a regex, so emit an error for space and tab.
2074
+ StringRef DiagChar;
2075
+ switch (*RegexContentStart) {
2076
+ case ' ' :
2077
+ DiagChar = " space" ;
2078
+ break ;
2079
+ case ' \t ' :
2080
+ DiagChar = " tab" ;
2081
+ break ;
2082
+ default :
2083
+ llvm_unreachable (" Unhandled case" );
2084
+ }
2085
+ Diags->diagnose (getSourceLoc (RegexContentStart),
2086
+ diag::lex_regex_literal_invalid_starting_char, DiagChar)
2087
+ .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2032
2088
}
2033
- diagnose (RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2034
- DiagChar)
2035
- .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2036
2089
break ;
2037
2090
}
2038
2091
default :
@@ -2045,25 +2098,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2045
2098
// - CompletelyErroneous will be set if there was an error that cannot be
2046
2099
// recovered from.
2047
2100
auto *Ptr = TokStart;
2048
- bool CompletelyErroneous = regexLiteralLexingFn (
2049
- &Ptr, BufferEnd, MustBeRegex,
2050
- getBridgedOptionalDiagnosticEngine (getTokenDiags ()));
2101
+ CompletelyErroneous = regexLiteralLexingFn (
2102
+ &Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine (Diags));
2051
2103
2052
2104
// If we didn't make any lexing progress, this isn't a regex literal and we
2053
2105
// should fallback to lexing as something else.
2054
2106
if (Ptr == TokStart)
2055
- return false ;
2107
+ return nullptr ;
2056
2108
2057
2109
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2058
2110
// We prefer to lex the comment as it's more likely than not that is what
2059
2111
// the user is expecting.
2060
2112
// TODO: This should be sunk into the Swift library.
2061
2113
if (IsForwardSlash && Ptr[-1 ] == ' /' && (*Ptr == ' *' || *Ptr == ' /' )) {
2062
2114
if (!MustBeRegex)
2063
- return false ;
2064
-
2065
- diagnose (TokStart, diag::lex_regex_literal_unterminated);
2115
+ return nullptr ;
2066
2116
2117
+ if (Diags) {
2118
+ Diags->diagnose (getSourceLoc (TokStart),
2119
+ diag::lex_regex_literal_unterminated);
2120
+ }
2067
2121
// Move the pointer back to the '/' of the comment.
2068
2122
Ptr--;
2069
2123
}
@@ -2096,7 +2150,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2096
2150
2097
2151
// Invalid, so bail.
2098
2152
if (GroupDepth == 0 )
2099
- return false ;
2153
+ return nullptr ;
2100
2154
2101
2155
GroupDepth -= 1 ;
2102
2156
break ;
@@ -2109,9 +2163,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2109
2163
}
2110
2164
}
2111
2165
}
2166
+ assert (Ptr > TokStart && Ptr <= BufferEnd);
2167
+ return Ptr;
2168
+ }
2169
+
2170
+ bool Lexer::tryLexRegexLiteral (const char *TokStart) {
2171
+ bool IsForwardSlash = (*TokStart == ' /' );
2172
+ bool MustBeRegex = true ;
2173
+
2174
+ if (IsForwardSlash) {
2175
+ switch (ForwardSlashRegexMode) {
2176
+ case LexerForwardSlashRegexMode::None:
2177
+ return false ;
2178
+ case LexerForwardSlashRegexMode::Tentative:
2179
+ MustBeRegex = false ;
2180
+ break ;
2181
+ case LexerForwardSlashRegexMode::Always:
2182
+ break ;
2183
+ }
2184
+ }
2185
+ bool CompletelyErroneous = false ;
2186
+ auto *Ptr = tryScanRegexLiteral (TokStart, MustBeRegex, getTokenDiags (),
2187
+ CompletelyErroneous);
2188
+ if (!Ptr)
2189
+ return false ;
2112
2190
2113
2191
// Update to point to where we ended regex lexing.
2114
- assert (Ptr > TokStart && Ptr <= BufferEnd);
2115
2192
CurPtr = Ptr;
2116
2193
2117
2194
// If the lexing was completely erroneous, form an unknown token.
0 commit comments