Skip to content

Commit 325ba43

Browse files
committed
[Parse] NFC: Split off tryScanRegexLiteral
This is a `const` version of `tryLexRegexLiteral` that does not advance `CurPtr`, but attempts to scan ahead to the end of a regex literal.
1 parent 4f5bf99 commit 325ba43

File tree

2 files changed

+60
-37
lines changed

2 files changed

+60
-37
lines changed

include/swift/Parse/Lexer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,12 @@ class Lexer {
644644
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
645645
void lexEscapedIdentifier();
646646

647+
/// Attempt to scan a regex literal, returning the end pointer, or `nullptr`
648+
/// if a regex literal cannot be scanned.
649+
const char *tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
650+
DiagnosticEngine *Diags,
651+
bool &CompletelyErroneous) const;
652+
647653
/// Attempt to lex a regex literal, returning true if lexing should continue,
648654
/// false if this is not a regex literal.
649655
bool tryLexRegexLiteral(const char *TokStart);

lib/Parse/Lexer.cpp

Lines changed: 54 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,27 +1980,18 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19801980
}
19811981
}
19821982

1983-
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
1983+
const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
1984+
DiagnosticEngine *Diags,
1985+
bool &CompletelyErroneous) const {
19841986
// We need to have experimental string processing enabled, and have the
19851987
// parsing logic for regex literals available.
19861988
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1987-
return false;
1989+
return nullptr;
19881990

1989-
bool MustBeRegex = true;
19901991
bool IsForwardSlash = (*TokStart == '/');
19911992

19921993
// Check if we're able to lex a `/.../` regex.
19931994
if (IsForwardSlash) {
1994-
switch (ForwardSlashRegexMode) {
1995-
case LexerForwardSlashRegexMode::None:
1996-
return false;
1997-
case LexerForwardSlashRegexMode::Tentative:
1998-
MustBeRegex = false;
1999-
break;
2000-
case LexerForwardSlashRegexMode::Always:
2001-
break;
2002-
}
2003-
20041995
// For `/.../` regex literals, we need to ban space and tab at the start of
20051996
// a regex to avoid ambiguity with operator chains, e.g:
20061997
//
@@ -2018,23 +2009,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20182009
case ' ':
20192010
case '\t': {
20202011
if (!MustBeRegex)
2021-
return false;
2012+
return nullptr;
20222013

2023-
// We must have a regex, so emit an error for space and tab.
2024-
StringRef DiagChar;
2025-
switch (*RegexContentStart) {
2026-
case ' ':
2027-
DiagChar = "space";
2028-
break;
2029-
case '\t':
2030-
DiagChar = "tab";
2031-
break;
2032-
default:
2033-
llvm_unreachable("Unhandled case");
2014+
if (Diags) {
2015+
// We must have a regex, so emit an error for space and tab.
2016+
StringRef DiagChar;
2017+
switch (*RegexContentStart) {
2018+
case ' ':
2019+
DiagChar = "space";
2020+
break;
2021+
case '\t':
2022+
DiagChar = "tab";
2023+
break;
2024+
default:
2025+
llvm_unreachable("Unhandled case");
2026+
}
2027+
Diags->diagnose(getSourceLoc(RegexContentStart),
2028+
diag::lex_regex_literal_invalid_starting_char, DiagChar)
2029+
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20342030
}
2035-
diagnose(RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2036-
DiagChar)
2037-
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20382031
break;
20392032
}
20402033
default:
@@ -2047,25 +2040,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20472040
// - CompletelyErroneous will be set if there was an error that cannot be
20482041
// recovered from.
20492042
auto *Ptr = TokStart;
2050-
bool CompletelyErroneous = regexLiteralLexingFn(
2051-
&Ptr, BufferEnd, MustBeRegex,
2052-
getBridgedOptionalDiagnosticEngine(getTokenDiags()));
2043+
CompletelyErroneous = regexLiteralLexingFn(
2044+
&Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine(Diags));
20532045

20542046
// If we didn't make any lexing progress, this isn't a regex literal and we
20552047
// should fallback to lexing as something else.
20562048
if (Ptr == TokStart)
2057-
return false;
2049+
return nullptr;
20582050

20592051
// If we're lexing `/.../`, error if we ended on the opening of a comment.
20602052
// We prefer to lex the comment as it's more likely than not that is what
20612053
// the user is expecting.
20622054
// TODO: This should be sunk into the Swift library.
20632055
if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
20642056
if (!MustBeRegex)
2065-
return false;
2066-
2067-
diagnose(TokStart, diag::lex_regex_literal_unterminated);
2057+
return nullptr;
20682058

2059+
if (Diags) {
2060+
Diags->diagnose(getSourceLoc(TokStart),
2061+
diag::lex_regex_literal_unterminated);
2062+
}
20692063
// Move the pointer back to the '/' of the comment.
20702064
Ptr--;
20712065
}
@@ -2098,7 +2092,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20982092

20992093
// Invalid, so bail.
21002094
if (GroupDepth == 0)
2101-
return false;
2095+
return nullptr;
21022096

21032097
GroupDepth -= 1;
21042098
break;
@@ -2111,9 +2105,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
21112105
}
21122106
}
21132107
}
2108+
assert(Ptr > TokStart && Ptr <= BufferEnd);
2109+
return Ptr;
2110+
}
2111+
2112+
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2113+
bool IsForwardSlash = (*TokStart == '/');
2114+
bool MustBeRegex = true;
2115+
2116+
if (IsForwardSlash) {
2117+
switch (ForwardSlashRegexMode) {
2118+
case LexerForwardSlashRegexMode::None:
2119+
return false;
2120+
case LexerForwardSlashRegexMode::Tentative:
2121+
MustBeRegex = false;
2122+
break;
2123+
case LexerForwardSlashRegexMode::Always:
2124+
break;
2125+
}
2126+
}
2127+
bool CompletelyErroneous = false;
2128+
auto *Ptr = tryScanRegexLiteral(TokStart, MustBeRegex, getTokenDiags(),
2129+
CompletelyErroneous);
2130+
if (!Ptr)
2131+
return false;
21142132

21152133
// Update to point to where we ended regex lexing.
2116-
assert(Ptr > TokStart && Ptr <= BufferEnd);
21172134
CurPtr = Ptr;
21182135

21192136
// If the lexing was completely erroneous, form an unknown token.

0 commit comments

Comments
 (0)