Skip to content

Commit e610a69

Browse files
authored
Merge pull request #59641 from hamishknight/no-skip-slash
2 parents 1c6055a + 515945f commit e610a69

File tree

9 files changed

+472
-93
lines changed

9 files changed

+472
-93
lines changed

include/swift/AST/DiagnosticEngine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1390,7 +1390,7 @@ namespace swift {
13901390
DiagnosticEngine &getDiags() { return QueueEngine; }
13911391

13921392
/// Retrieve the underlying engine which will receive the diagnostics.
1393-
DiagnosticEngine &getUnderlyingDiags() { return UnderlyingEngine; }
1393+
DiagnosticEngine &getUnderlyingDiags() const { return UnderlyingEngine; }
13941394

13951395
/// Clear this queue and erase all diagnostics recorded.
13961396
void clear() {

include/swift/Parse/Lexer.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ class Lexer {
180180
/// Retrieve the underlying diagnostic engine we emit diagnostics to. Note
181181
/// this should only be used for diagnostics not concerned with the current
182182
/// token.
183-
DiagnosticEngine *getUnderlyingDiags() {
183+
DiagnosticEngine *getUnderlyingDiags() const {
184184
return DiagQueue ? &DiagQueue->getUnderlyingDiags() : nullptr;
185185
}
186186

@@ -218,7 +218,10 @@ class Lexer {
218218
/// \param Parent the parent lexer that scans the whole buffer
219219
/// \param BeginState start of the subrange
220220
/// \param EndState end of the subrange
221-
Lexer(Lexer &Parent, State BeginState, State EndState);
221+
/// \param EnableDiagnostics Whether to inherit the diagnostic engine of
222+
/// \p Parent. If \c false, diagnostics will be disabled.
223+
Lexer(const Lexer &Parent, State BeginState, State EndState,
224+
bool EnableDiagnostics = true);
222225

223226
/// Returns true if this lexer will produce a code completion token.
224227
bool isCodeCompletion() const {
@@ -577,6 +580,13 @@ class Lexer {
577580
: LexerForwardSlashRegexMode::Tentative) {}
578581
};
579582

583+
/// Checks whether a given token could potentially contain the start of an
584+
/// unskippable `/.../` regex literal. Such tokens need to go through the
585+
/// parser, as they may become regex literal tokens. This includes operator
586+
/// tokens such as `!/` which could be split into prefix `!` on a regex
587+
/// literal.
588+
bool isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const;
589+
580590
private:
581591
/// Nul character meaning kind.
582592
enum class NulCharacterKind {
@@ -641,6 +651,12 @@ class Lexer {
641651
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
642652
void lexEscapedIdentifier();
643653

654+
/// Attempt to scan a regex literal, returning the end pointer, or `nullptr`
655+
/// if a regex literal cannot be scanned.
656+
const char *tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
657+
DiagnosticEngine *Diags,
658+
bool &CompletelyErroneous) const;
659+
644660
/// Attempt to lex a regex literal, returning true if lexing should continue,
645661
/// false if this is not a regex literal.
646662
bool tryLexRegexLiteral(const char *TokStart);

include/swift/Parse/Parser.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -718,13 +718,6 @@ class Parser {
718718
/// plain Tok.is(T1) check).
719719
bool skipUntilTokenOrEndOfLine(tok T1, tok T2 = tok::NUM_TOKENS);
720720

721-
/// Skip a braced block (e.g. function body). The current token must be '{'.
722-
/// Returns \c true if the parser hit the eof before finding matched '}'.
723-
///
724-
/// Set \c HasNestedTypeDeclarations to true if a token for a type
725-
/// declaration is detected in the skipped block.
726-
bool skipBracedBlock(bool &HasNestedTypeDeclarations);
727-
728721
/// Skip over SIL decls until we encounter the start of a Swift decl or eof.
729722
void skipSILUntilSwiftDecl();
730723

@@ -1001,6 +994,8 @@ class Parser {
1001994
bool canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
1002995
bool &HasNestedClassDeclarations);
1003996

997+
bool canDelayFunctionBodyParsing(bool &HasNestedTypeDeclarations);
998+
1004999
bool delayParsingDeclList(SourceLoc LBLoc, SourceLoc &RBLoc,
10051000
IterableDeclContext *IDC);
10061001

@@ -1211,9 +1206,7 @@ class Parser {
12111206
bool &hasEffectfulGet,
12121207
AccessorKind currentKind,
12131208
SourceLoc const& currentLoc);
1214-
1215-
void consumeAbstractFunctionBody(AbstractFunctionDecl *AFD,
1216-
const DeclAttributes &Attrs);
1209+
12171210
ParserResult<FuncDecl> parseDeclFunc(SourceLoc StaticLoc,
12181211
StaticSpellingKind StaticSpelling,
12191212
ParseDeclOptions Flags,

lib/Parse/Lexer.cpp

Lines changed: 116 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,11 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
246246
initialize(Offset, EndOffset);
247247
}
248248

249-
Lexer::Lexer(Lexer &Parent, State BeginState, State EndState)
249+
Lexer::Lexer(const Lexer &Parent, State BeginState, State EndState,
250+
bool EnableDiagnostics)
250251
: Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
251-
Parent.getUnderlyingDiags(), Parent.LexMode,
252+
EnableDiagnostics ? Parent.getUnderlyingDiags() : nullptr,
253+
Parent.LexMode,
252254
Parent.IsHashbangAllowed
253255
? HashbangMode::Allowed
254256
: HashbangMode::Disallowed,
@@ -1978,27 +1980,76 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19781980
}
19791981
}
19801982

1981-
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
1983+
bool Lexer::isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const {
1984+
if (!LangOpts.hasFeature(Feature::BareSlashRegexLiterals))
1985+
return false;
1986+
1987+
// A `/.../` regex literal may only start on a binary or prefix operator.
1988+
if (Tok.isNot(tok::oper_prefix, tok::oper_binary_spaced,
1989+
tok::oper_binary_unspaced)) {
1990+
return false;
1991+
}
1992+
auto SlashIdx = Tok.getText().find("/");
1993+
if (SlashIdx == StringRef::npos)
1994+
return false;
1995+
1996+
auto Offset = getBufferPtrForSourceLoc(Tok.getLoc()) + SlashIdx;
1997+
bool CompletelyErroneous;
1998+
if (tryScanRegexLiteral(Offset, /*MustBeRegex*/ false, /*Diags*/ nullptr,
1999+
CompletelyErroneous)) {
2000+
// Definitely a regex literal.
2001+
return true;
2002+
}
2003+
2004+
// A prefix '/' can never be a regex literal if it failed a heuristic.
2005+
if (Tok.is(tok::oper_prefix))
2006+
return false;
2007+
2008+
// We either don't have a regex literal, or we failed a heuristic. We now need
2009+
// to make sure we don't have an unbalanced `{` or `}`, as that would have the
2010+
// potential to change the range of a skipped body if we try to more
2011+
// agressively lex a regex literal during normal parsing. If we have balanced
2012+
// `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
2013+
// worse diagnostic.
2014+
// FIXME: We ought to silence lexer diagnostics when skipping, this would
2015+
// avoid emitting a worse diagnostic.
2016+
auto *EndPtr = tryScanRegexLiteral(Offset, /*MustBeRegex*/ true,
2017+
/*Diags*/ nullptr, CompletelyErroneous);
2018+
if (!EndPtr)
2019+
return false;
2020+
2021+
Lexer L(*this, State(Tok.getLoc().getAdvancedLoc(Tok.getLength())),
2022+
State(getSourceLoc(EndPtr)), /*EnableDiagnostics*/ false);
2023+
2024+
unsigned OpenBraces = 0;
2025+
while (L.peekNextToken().isNot(tok::eof)) {
2026+
Token Tok;
2027+
L.lex(Tok);
2028+
if (Tok.is(tok::l_brace))
2029+
OpenBraces += 1;
2030+
if (Tok.is(tok::r_brace)) {
2031+
if (OpenBraces == 0)
2032+
return true;
2033+
OpenBraces -= 1;
2034+
}
2035+
}
2036+
2037+
// If we have an unbalanced `{`, this is unskippable.
2038+
return OpenBraces != 0;
2039+
}
2040+
2041+
const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
2042+
DiagnosticEngine *Diags,
2043+
bool &CompletelyErroneous) const {
19822044
// We need to have experimental string processing enabled, and have the
19832045
// parsing logic for regex literals available.
19842046
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1985-
return false;
2047+
return nullptr;
19862048

1987-
bool MustBeRegex = true;
19882049
bool IsForwardSlash = (*TokStart == '/');
19892050

19902051
// Check if we're able to lex a `/.../` regex.
19912052
if (IsForwardSlash) {
1992-
switch (ForwardSlashRegexMode) {
1993-
case LexerForwardSlashRegexMode::None:
1994-
return false;
1995-
case LexerForwardSlashRegexMode::Tentative:
1996-
MustBeRegex = false;
1997-
break;
1998-
case LexerForwardSlashRegexMode::Always:
1999-
break;
2000-
}
2001-
20022053
// For `/.../` regex literals, we need to ban space and tab at the start of
20032054
// a regex to avoid ambiguity with operator chains, e.g:
20042055
//
@@ -2016,23 +2067,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20162067
case ' ':
20172068
case '\t': {
20182069
if (!MustBeRegex)
2019-
return false;
2070+
return nullptr;
20202071

2021-
// We must have a regex, so emit an error for space and tab.
2022-
StringRef DiagChar;
2023-
switch (*RegexContentStart) {
2024-
case ' ':
2025-
DiagChar = "space";
2026-
break;
2027-
case '\t':
2028-
DiagChar = "tab";
2029-
break;
2030-
default:
2031-
llvm_unreachable("Unhandled case");
2072+
if (Diags) {
2073+
// We must have a regex, so emit an error for space and tab.
2074+
StringRef DiagChar;
2075+
switch (*RegexContentStart) {
2076+
case ' ':
2077+
DiagChar = "space";
2078+
break;
2079+
case '\t':
2080+
DiagChar = "tab";
2081+
break;
2082+
default:
2083+
llvm_unreachable("Unhandled case");
2084+
}
2085+
Diags->diagnose(getSourceLoc(RegexContentStart),
2086+
diag::lex_regex_literal_invalid_starting_char, DiagChar)
2087+
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20322088
}
2033-
diagnose(RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2034-
DiagChar)
2035-
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20362089
break;
20372090
}
20382091
default:
@@ -2045,25 +2098,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20452098
// - CompletelyErroneous will be set if there was an error that cannot be
20462099
// recovered from.
20472100
auto *Ptr = TokStart;
2048-
bool CompletelyErroneous = regexLiteralLexingFn(
2049-
&Ptr, BufferEnd, MustBeRegex,
2050-
getBridgedOptionalDiagnosticEngine(getTokenDiags()));
2101+
CompletelyErroneous = regexLiteralLexingFn(
2102+
&Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine(Diags));
20512103

20522104
// If we didn't make any lexing progress, this isn't a regex literal and we
20532105
// should fallback to lexing as something else.
20542106
if (Ptr == TokStart)
2055-
return false;
2107+
return nullptr;
20562108

20572109
// If we're lexing `/.../`, error if we ended on the opening of a comment.
20582110
// We prefer to lex the comment as it's more likely than not that is what
20592111
// the user is expecting.
20602112
// TODO: This should be sunk into the Swift library.
20612113
if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
20622114
if (!MustBeRegex)
2063-
return false;
2064-
2065-
diagnose(TokStart, diag::lex_regex_literal_unterminated);
2115+
return nullptr;
20662116

2117+
if (Diags) {
2118+
Diags->diagnose(getSourceLoc(TokStart),
2119+
diag::lex_regex_literal_unterminated);
2120+
}
20672121
// Move the pointer back to the '/' of the comment.
20682122
Ptr--;
20692123
}
@@ -2096,7 +2150,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20962150

20972151
// Invalid, so bail.
20982152
if (GroupDepth == 0)
2099-
return false;
2153+
return nullptr;
21002154

21012155
GroupDepth -= 1;
21022156
break;
@@ -2109,9 +2163,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
21092163
}
21102164
}
21112165
}
2166+
assert(Ptr > TokStart && Ptr <= BufferEnd);
2167+
return Ptr;
2168+
}
2169+
2170+
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2171+
bool IsForwardSlash = (*TokStart == '/');
2172+
bool MustBeRegex = true;
2173+
2174+
if (IsForwardSlash) {
2175+
switch (ForwardSlashRegexMode) {
2176+
case LexerForwardSlashRegexMode::None:
2177+
return false;
2178+
case LexerForwardSlashRegexMode::Tentative:
2179+
MustBeRegex = false;
2180+
break;
2181+
case LexerForwardSlashRegexMode::Always:
2182+
break;
2183+
}
2184+
}
2185+
bool CompletelyErroneous = false;
2186+
auto *Ptr = tryScanRegexLiteral(TokStart, MustBeRegex, getTokenDiags(),
2187+
CompletelyErroneous);
2188+
if (!Ptr)
2189+
return false;
21122190

21132191
// Update to point to where we ended regex lexing.
2114-
assert(Ptr > TokStart && Ptr <= BufferEnd);
21152192
CurPtr = Ptr;
21162193

21172194
// If the lexing was completely erroneous, form an unknown token.

0 commit comments

Comments
 (0)