@@ -1972,27 +1972,18 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
1972
1972
}
1973
1973
}
1974
1974
1975
- bool Lexer::tryLexRegexLiteral (const char *TokStart) {
1975
+ const char *Lexer::tryScanRegexLiteral (const char *TokStart, bool MustBeRegex,
1976
+ DiagnosticEngine *Diags,
1977
+ bool &CompletelyErroneous) const {
1976
1978
// We need to have experimental string processing enabled, and have the
1977
1979
// parsing logic for regex literals available.
1978
1980
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1979
- return false ;
1981
+ return nullptr ;
1980
1982
1981
- bool MustBeRegex = true ;
1982
1983
bool IsForwardSlash = (*TokStart == ' /' );
1983
1984
1984
1985
// Check if we're able to lex a `/.../` regex.
1985
1986
if (IsForwardSlash) {
1986
- switch (ForwardSlashRegexMode) {
1987
- case LexerForwardSlashRegexMode::None:
1988
- return false ;
1989
- case LexerForwardSlashRegexMode::Tentative:
1990
- MustBeRegex = false ;
1991
- break ;
1992
- case LexerForwardSlashRegexMode::Always:
1993
- break ;
1994
- }
1995
-
1996
1987
// For `/.../` regex literals, we need to ban space and tab at the start of
1997
1988
// a regex to avoid ambiguity with operator chains, e.g:
1998
1989
//
@@ -2010,23 +2001,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2010
2001
case ' ' :
2011
2002
case ' \t ' : {
2012
2003
if (!MustBeRegex)
2013
- return false ;
2004
+ return nullptr ;
2014
2005
2015
- // We must have a regex, so emit an error for space and tab.
2016
- StringRef DiagChar;
2017
- switch (*RegexContentStart) {
2018
- case ' ' :
2019
- DiagChar = " space" ;
2020
- break ;
2021
- case ' \t ' :
2022
- DiagChar = " tab" ;
2023
- break ;
2024
- default :
2025
- llvm_unreachable (" Unhandled case" );
2006
+ if (Diags) {
2007
+ // We must have a regex, so emit an error for space and tab.
2008
+ StringRef DiagChar;
2009
+ switch (*RegexContentStart) {
2010
+ case ' ' :
2011
+ DiagChar = " space" ;
2012
+ break ;
2013
+ case ' \t ' :
2014
+ DiagChar = " tab" ;
2015
+ break ;
2016
+ default :
2017
+ llvm_unreachable (" Unhandled case" );
2018
+ }
2019
+ Diags->diagnose (getSourceLoc (RegexContentStart),
2020
+ diag::lex_regex_literal_invalid_starting_char, DiagChar)
2021
+ .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2026
2022
}
2027
- diagnose (RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2028
- DiagChar)
2029
- .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2030
2023
break ;
2031
2024
}
2032
2025
default :
@@ -2039,25 +2032,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2039
2032
// - CompletelyErroneous will be set if there was an error that cannot be
2040
2033
// recovered from.
2041
2034
auto *Ptr = TokStart;
2042
- bool CompletelyErroneous = regexLiteralLexingFn (
2043
- &Ptr, BufferEnd, MustBeRegex,
2044
- getBridgedOptionalDiagnosticEngine (getTokenDiags ()));
2035
+ CompletelyErroneous = regexLiteralLexingFn (
2036
+ &Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine (Diags));
2045
2037
2046
2038
// If we didn't make any lexing progress, this isn't a regex literal and we
2047
2039
// should fallback to lexing as something else.
2048
2040
if (Ptr == TokStart)
2049
- return false ;
2041
+ return nullptr ;
2050
2042
2051
2043
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2052
2044
// We prefer to lex the comment as it's more likely than not that is what
2053
2045
// the user is expecting.
2054
2046
// TODO: This should be sunk into the Swift library.
2055
2047
if (IsForwardSlash && Ptr[-1 ] == ' /' && (*Ptr == ' *' || *Ptr == ' /' )) {
2056
2048
if (!MustBeRegex)
2057
- return false ;
2058
-
2059
- diagnose (TokStart, diag::lex_regex_literal_unterminated);
2049
+ return nullptr ;
2060
2050
2051
+ if (Diags) {
2052
+ Diags->diagnose (getSourceLoc (TokStart),
2053
+ diag::lex_regex_literal_unterminated);
2054
+ }
2061
2055
// Move the pointer back to the '/' of the comment.
2062
2056
Ptr--;
2063
2057
}
@@ -2090,7 +2084,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2090
2084
2091
2085
// Invalid, so bail.
2092
2086
if (GroupDepth == 0 )
2093
- return false ;
2087
+ return nullptr ;
2094
2088
2095
2089
GroupDepth -= 1 ;
2096
2090
break ;
@@ -2103,9 +2097,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2103
2097
}
2104
2098
}
2105
2099
}
2100
+ assert (Ptr > TokStart && Ptr <= BufferEnd);
2101
+ return Ptr;
2102
+ }
2103
+
2104
+ bool Lexer::tryLexRegexLiteral (const char *TokStart) {
2105
+ bool IsForwardSlash = (*TokStart == ' /' );
2106
+ bool MustBeRegex = true ;
2107
+
2108
+ if (IsForwardSlash) {
2109
+ switch (ForwardSlashRegexMode) {
2110
+ case LexerForwardSlashRegexMode::None:
2111
+ return false ;
2112
+ case LexerForwardSlashRegexMode::Tentative:
2113
+ MustBeRegex = false ;
2114
+ break ;
2115
+ case LexerForwardSlashRegexMode::Always:
2116
+ break ;
2117
+ }
2118
+ }
2119
+ bool CompletelyErroneous = false ;
2120
+ auto *Ptr = tryScanRegexLiteral (TokStart, MustBeRegex, getTokenDiags (),
2121
+ CompletelyErroneous);
2122
+ if (!Ptr)
2123
+ return false ;
2106
2124
2107
2125
// Update to point to where we ended regex lexing.
2108
- assert (Ptr > TokStart && Ptr <= BufferEnd);
2109
2126
CurPtr = Ptr;
2110
2127
2111
2128
// If the lexing was completely erroneous, form an unknown token.
0 commit comments