@@ -1980,27 +1980,18 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
1980
1980
}
1981
1981
}
1982
1982
1983
- bool Lexer::tryLexRegexLiteral (const char *TokStart) {
1983
+ const char *Lexer::tryScanRegexLiteral (const char *TokStart, bool MustBeRegex,
1984
+ DiagnosticEngine *Diags,
1985
+ bool &CompletelyErroneous) const {
1984
1986
// We need to have experimental string processing enabled, and have the
1985
1987
// parsing logic for regex literals available.
1986
1988
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1987
- return false ;
1989
+ return nullptr ;
1988
1990
1989
- bool MustBeRegex = true ;
1990
1991
bool IsForwardSlash = (*TokStart == ' /' );
1991
1992
1992
1993
// Check if we're able to lex a `/.../` regex.
1993
1994
if (IsForwardSlash) {
1994
- switch (ForwardSlashRegexMode) {
1995
- case LexerForwardSlashRegexMode::None:
1996
- return false ;
1997
- case LexerForwardSlashRegexMode::Tentative:
1998
- MustBeRegex = false ;
1999
- break ;
2000
- case LexerForwardSlashRegexMode::Always:
2001
- break ;
2002
- }
2003
-
2004
1995
// For `/.../` regex literals, we need to ban space and tab at the start of
2005
1996
// a regex to avoid ambiguity with operator chains, e.g:
2006
1997
//
@@ -2018,23 +2009,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2018
2009
case ' ' :
2019
2010
case ' \t ' : {
2020
2011
if (!MustBeRegex)
2021
- return false ;
2012
+ return nullptr ;
2022
2013
2023
- // We must have a regex, so emit an error for space and tab.
2024
- StringRef DiagChar;
2025
- switch (*RegexContentStart) {
2026
- case ' ' :
2027
- DiagChar = " space" ;
2028
- break ;
2029
- case ' \t ' :
2030
- DiagChar = " tab" ;
2031
- break ;
2032
- default :
2033
- llvm_unreachable (" Unhandled case" );
2014
+ if (Diags) {
2015
+ // We must have a regex, so emit an error for space and tab.
2016
+ StringRef DiagChar;
2017
+ switch (*RegexContentStart) {
2018
+ case ' ' :
2019
+ DiagChar = " space" ;
2020
+ break ;
2021
+ case ' \t ' :
2022
+ DiagChar = " tab" ;
2023
+ break ;
2024
+ default :
2025
+ llvm_unreachable (" Unhandled case" );
2026
+ }
2027
+ Diags->diagnose (getSourceLoc (RegexContentStart),
2028
+ diag::lex_regex_literal_invalid_starting_char, DiagChar)
2029
+ .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2034
2030
}
2035
- diagnose (RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2036
- DiagChar)
2037
- .fixItInsert (getSourceLoc (RegexContentStart), " \\ " );
2038
2031
break ;
2039
2032
}
2040
2033
default :
@@ -2047,25 +2040,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2047
2040
// - CompletelyErroneous will be set if there was an error that cannot be
2048
2041
// recovered from.
2049
2042
auto *Ptr = TokStart;
2050
- bool CompletelyErroneous = regexLiteralLexingFn (
2051
- &Ptr, BufferEnd, MustBeRegex,
2052
- getBridgedOptionalDiagnosticEngine (getTokenDiags ()));
2043
+ CompletelyErroneous = regexLiteralLexingFn (
2044
+ &Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine (Diags));
2053
2045
2054
2046
// If we didn't make any lexing progress, this isn't a regex literal and we
2055
2047
// should fallback to lexing as something else.
2056
2048
if (Ptr == TokStart)
2057
- return false ;
2049
+ return nullptr ;
2058
2050
2059
2051
// If we're lexing `/.../`, error if we ended on the opening of a comment.
2060
2052
// We prefer to lex the comment as it's more likely than not that is what
2061
2053
// the user is expecting.
2062
2054
// TODO: This should be sunk into the Swift library.
2063
2055
if (IsForwardSlash && Ptr[-1 ] == ' /' && (*Ptr == ' *' || *Ptr == ' /' )) {
2064
2056
if (!MustBeRegex)
2065
- return false ;
2066
-
2067
- diagnose (TokStart, diag::lex_regex_literal_unterminated);
2057
+ return nullptr ;
2068
2058
2059
+ if (Diags) {
2060
+ Diags->diagnose (getSourceLoc (TokStart),
2061
+ diag::lex_regex_literal_unterminated);
2062
+ }
2069
2063
// Move the pointer back to the '/' of the comment.
2070
2064
Ptr--;
2071
2065
}
@@ -2098,7 +2092,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2098
2092
2099
2093
// Invalid, so bail.
2100
2094
if (GroupDepth == 0 )
2101
- return false ;
2095
+ return nullptr ;
2102
2096
2103
2097
GroupDepth -= 1 ;
2104
2098
break ;
@@ -2111,9 +2105,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2111
2105
}
2112
2106
}
2113
2107
}
2108
+ assert (Ptr > TokStart && Ptr <= BufferEnd);
2109
+ return Ptr;
2110
+ }
2111
+
2112
+ bool Lexer::tryLexRegexLiteral (const char *TokStart) {
2113
+ bool IsForwardSlash = (*TokStart == ' /' );
2114
+ bool MustBeRegex = true ;
2115
+
2116
+ if (IsForwardSlash) {
2117
+ switch (ForwardSlashRegexMode) {
2118
+ case LexerForwardSlashRegexMode::None:
2119
+ return false ;
2120
+ case LexerForwardSlashRegexMode::Tentative:
2121
+ MustBeRegex = false ;
2122
+ break ;
2123
+ case LexerForwardSlashRegexMode::Always:
2124
+ break ;
2125
+ }
2126
+ }
2127
+ bool CompletelyErroneous = false ;
2128
+ auto *Ptr = tryScanRegexLiteral (TokStart, MustBeRegex, getTokenDiags (),
2129
+ CompletelyErroneous);
2130
+ if (!Ptr)
2131
+ return false ;
2114
2132
2115
2133
// Update to point to where we ended regex lexing.
2116
- assert (Ptr > TokStart && Ptr <= BufferEnd);
2117
2134
CurPtr = Ptr;
2118
2135
2119
2136
// If the lexing was completely erroneous, form an unknown token.
0 commit comments