Skip to content

Commit 8a25031

Browse files
committed
[Parse] NFC: Split off tryScanRegexLiteral
This is a `const` version of `tryLexRegexLiteral` that does not advance `CurPtr`, but attempts to scan ahead to the end of a regex literal.
1 parent 7a70207 commit 8a25031

File tree

2 files changed

+60
-37
lines changed

2 files changed

+60
-37
lines changed

include/swift/Parse/Lexer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,12 @@ class Lexer {
644644
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
645645
void lexEscapedIdentifier();
646646

647+
/// Attempt to scan a regex literal, returning the end pointer, or `nullptr`
648+
/// if a regex literal cannot be scanned.
649+
const char *tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
650+
DiagnosticEngine *Diags,
651+
bool &CompletelyErroneous) const;
652+
647653
/// Attempt to lex a regex literal, returning true if lexing should continue,
648654
/// false if this is not a regex literal.
649655
bool tryLexRegexLiteral(const char *TokStart);

lib/Parse/Lexer.cpp

Lines changed: 54 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,27 +1972,18 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19721972
}
19731973
}
19741974

1975-
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
1975+
const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
1976+
DiagnosticEngine *Diags,
1977+
bool &CompletelyErroneous) const {
19761978
// We need to have experimental string processing enabled, and have the
19771979
// parsing logic for regex literals available.
19781980
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1979-
return false;
1981+
return nullptr;
19801982

1981-
bool MustBeRegex = true;
19821983
bool IsForwardSlash = (*TokStart == '/');
19831984

19841985
// Check if we're able to lex a `/.../` regex.
19851986
if (IsForwardSlash) {
1986-
switch (ForwardSlashRegexMode) {
1987-
case LexerForwardSlashRegexMode::None:
1988-
return false;
1989-
case LexerForwardSlashRegexMode::Tentative:
1990-
MustBeRegex = false;
1991-
break;
1992-
case LexerForwardSlashRegexMode::Always:
1993-
break;
1994-
}
1995-
19961987
// For `/.../` regex literals, we need to ban space and tab at the start of
19971988
// a regex to avoid ambiguity with operator chains, e.g:
19981989
//
@@ -2010,23 +2001,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20102001
case ' ':
20112002
case '\t': {
20122003
if (!MustBeRegex)
2013-
return false;
2004+
return nullptr;
20142005

2015-
// We must have a regex, so emit an error for space and tab.
2016-
StringRef DiagChar;
2017-
switch (*RegexContentStart) {
2018-
case ' ':
2019-
DiagChar = "space";
2020-
break;
2021-
case '\t':
2022-
DiagChar = "tab";
2023-
break;
2024-
default:
2025-
llvm_unreachable("Unhandled case");
2006+
if (Diags) {
2007+
// We must have a regex, so emit an error for space and tab.
2008+
StringRef DiagChar;
2009+
switch (*RegexContentStart) {
2010+
case ' ':
2011+
DiagChar = "space";
2012+
break;
2013+
case '\t':
2014+
DiagChar = "tab";
2015+
break;
2016+
default:
2017+
llvm_unreachable("Unhandled case");
2018+
}
2019+
Diags->diagnose(getSourceLoc(RegexContentStart),
2020+
diag::lex_regex_literal_invalid_starting_char, DiagChar)
2021+
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20262022
}
2027-
diagnose(RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2028-
DiagChar)
2029-
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20302023
break;
20312024
}
20322025
default:
@@ -2039,25 +2032,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20392032
// - CompletelyErroneous will be set if there was an error that cannot be
20402033
// recovered from.
20412034
auto *Ptr = TokStart;
2042-
bool CompletelyErroneous = regexLiteralLexingFn(
2043-
&Ptr, BufferEnd, MustBeRegex,
2044-
getBridgedOptionalDiagnosticEngine(getTokenDiags()));
2035+
CompletelyErroneous = regexLiteralLexingFn(
2036+
&Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine(Diags));
20452037

20462038
// If we didn't make any lexing progress, this isn't a regex literal and we
20472039
// should fallback to lexing as something else.
20482040
if (Ptr == TokStart)
2049-
return false;
2041+
return nullptr;
20502042

20512043
// If we're lexing `/.../`, error if we ended on the opening of a comment.
20522044
// We prefer to lex the comment as it's more likely than not that is what
20532045
// the user is expecting.
20542046
// TODO: This should be sunk into the Swift library.
20552047
if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
20562048
if (!MustBeRegex)
2057-
return false;
2058-
2059-
diagnose(TokStart, diag::lex_regex_literal_unterminated);
2049+
return nullptr;
20602050

2051+
if (Diags) {
2052+
Diags->diagnose(getSourceLoc(TokStart),
2053+
diag::lex_regex_literal_unterminated);
2054+
}
20612055
// Move the pointer back to the '/' of the comment.
20622056
Ptr--;
20632057
}
@@ -2090,7 +2084,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20902084

20912085
// Invalid, so bail.
20922086
if (GroupDepth == 0)
2093-
return false;
2087+
return nullptr;
20942088

20952089
GroupDepth -= 1;
20962090
break;
@@ -2103,9 +2097,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
21032097
}
21042098
}
21052099
}
2100+
assert(Ptr > TokStart && Ptr <= BufferEnd);
2101+
return Ptr;
2102+
}
2103+
2104+
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2105+
bool IsForwardSlash = (*TokStart == '/');
2106+
bool MustBeRegex = true;
2107+
2108+
if (IsForwardSlash) {
2109+
switch (ForwardSlashRegexMode) {
2110+
case LexerForwardSlashRegexMode::None:
2111+
return false;
2112+
case LexerForwardSlashRegexMode::Tentative:
2113+
MustBeRegex = false;
2114+
break;
2115+
case LexerForwardSlashRegexMode::Always:
2116+
break;
2117+
}
2118+
}
2119+
bool CompletelyErroneous = false;
2120+
auto *Ptr = tryScanRegexLiteral(TokStart, MustBeRegex, getTokenDiags(),
2121+
CompletelyErroneous);
2122+
if (!Ptr)
2123+
return false;
21062124

21072125
// Update to point to where we ended regex lexing.
2108-
assert(Ptr > TokStart && Ptr <= BufferEnd);
21092126
CurPtr = Ptr;
21102127

21112128
// If the lexing was completely erroneous, form an unknown token.

0 commit comments

Comments
 (0)