Skip to content

Commit 700f776

Browse files
authored
Merge pull request #59642 from hamishknight/no-skip-slash-5.7
2 parents fd0d6d9 + 1807fb3 commit 700f776

File tree

9 files changed

+459
-93
lines changed

9 files changed

+459
-93
lines changed

include/swift/AST/DiagnosticEngine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1390,7 +1390,7 @@ namespace swift {
13901390
DiagnosticEngine &getDiags() { return QueueEngine; }
13911391

13921392
/// Retrieve the underlying engine which will receive the diagnostics.
1393-
DiagnosticEngine &getUnderlyingDiags() { return UnderlyingEngine; }
1393+
DiagnosticEngine &getUnderlyingDiags() const { return UnderlyingEngine; }
13941394

13951395
/// Clear this queue and erase all diagnostics recorded.
13961396
void clear() {

include/swift/Parse/Lexer.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ class Lexer {
180180
/// Retrieve the underlying diagnostic engine we emit diagnostics to. Note
181181
/// this should only be used for diagnostics not concerned with the current
182182
/// token.
183-
DiagnosticEngine *getUnderlyingDiags() {
183+
DiagnosticEngine *getUnderlyingDiags() const {
184184
return DiagQueue ? &DiagQueue->getUnderlyingDiags() : nullptr;
185185
}
186186

@@ -218,7 +218,10 @@ class Lexer {
218218
/// \param Parent the parent lexer that scans the whole buffer
219219
/// \param BeginState start of the subrange
220220
/// \param EndState end of the subrange
221-
Lexer(Lexer &Parent, State BeginState, State EndState);
221+
/// \param EnableDiagnostics Whether to inherit the diagnostic engine of
222+
/// \p Parent. If \c false, diagnostics will be disabled.
223+
Lexer(const Lexer &Parent, State BeginState, State EndState,
224+
bool EnableDiagnostics = true);
222225

223226
/// Returns true if this lexer will produce a code completion token.
224227
bool isCodeCompletion() const {
@@ -577,6 +580,13 @@ class Lexer {
577580
: LexerForwardSlashRegexMode::Tentative) {}
578581
};
579582

583+
/// Checks whether a given token could potentially contain the start of an
584+
/// unskippable `/.../` regex literal. Such tokens need to go through the
585+
/// parser, as they may become regex literal tokens. This includes operator
586+
/// tokens such as `!/` which could be split into prefix `!` on a regex
587+
/// literal.
588+
bool isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const;
589+
580590
private:
581591
/// Nul character meaning kind.
582592
enum class NulCharacterKind {
@@ -641,6 +651,12 @@ class Lexer {
641651
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
642652
void lexEscapedIdentifier();
643653

654+
/// Attempt to scan a regex literal, returning the end pointer, or `nullptr`
655+
/// if a regex literal cannot be scanned.
656+
const char *tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
657+
DiagnosticEngine *Diags,
658+
bool &CompletelyErroneous) const;
659+
644660
/// Attempt to lex a regex literal, returning true if lexing should continue,
645661
/// false if this is not a regex literal.
646662
bool tryLexRegexLiteral(const char *TokStart);

include/swift/Parse/Parser.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -715,13 +715,6 @@ class Parser {
715715
/// plain Tok.is(T1) check).
716716
bool skipUntilTokenOrEndOfLine(tok T1, tok T2 = tok::NUM_TOKENS);
717717

718-
/// Skip a braced block (e.g. function body). The current token must be '{'.
719-
/// Returns \c true if the parser hit the eof before finding matched '}'.
720-
///
721-
/// Set \c HasNestedTypeDeclarations to true if a token for a type
722-
/// declaration is detected in the skipped block.
723-
bool skipBracedBlock(bool &HasNestedTypeDeclarations);
724-
725718
/// Skip over SIL decls until we encounter the start of a Swift decl or eof.
726719
void skipSILUntilSwiftDecl();
727720

@@ -1000,6 +993,8 @@ class Parser {
1000993
bool canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
1001994
bool &HasNestedClassDeclarations);
1002995

996+
bool canDelayFunctionBodyParsing(bool &HasNestedTypeDeclarations);
997+
1003998
bool delayParsingDeclList(SourceLoc LBLoc, SourceLoc &RBLoc,
1004999
IterableDeclContext *IDC);
10051000

@@ -1210,9 +1205,7 @@ class Parser {
12101205
bool &hasEffectfulGet,
12111206
AccessorKind currentKind,
12121207
SourceLoc const& currentLoc);
1213-
1214-
void consumeAbstractFunctionBody(AbstractFunctionDecl *AFD,
1215-
const DeclAttributes &Attrs);
1208+
12161209
ParserResult<FuncDecl> parseDeclFunc(SourceLoc StaticLoc,
12171210
StaticSpellingKind StaticSpelling,
12181211
ParseDeclOptions Flags,

lib/Parse/Lexer.cpp

Lines changed: 116 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,11 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
246246
initialize(Offset, EndOffset);
247247
}
248248

249-
Lexer::Lexer(Lexer &Parent, State BeginState, State EndState)
249+
Lexer::Lexer(const Lexer &Parent, State BeginState, State EndState,
250+
bool EnableDiagnostics)
250251
: Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
251-
Parent.getUnderlyingDiags(), Parent.LexMode,
252+
EnableDiagnostics ? Parent.getUnderlyingDiags() : nullptr,
253+
Parent.LexMode,
252254
Parent.IsHashbangAllowed
253255
? HashbangMode::Allowed
254256
: HashbangMode::Disallowed,
@@ -1970,27 +1972,76 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19701972
}
19711973
}
19721974

1973-
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
1975+
bool Lexer::isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const {
1976+
if (!LangOpts.EnableBareSlashRegexLiterals)
1977+
return false;
1978+
1979+
// A `/.../` regex literal may only start on a binary or prefix operator.
1980+
if (Tok.isNot(tok::oper_prefix, tok::oper_binary_spaced,
1981+
tok::oper_binary_unspaced)) {
1982+
return false;
1983+
}
1984+
auto SlashIdx = Tok.getText().find("/");
1985+
if (SlashIdx == StringRef::npos)
1986+
return false;
1987+
1988+
auto Offset = getBufferPtrForSourceLoc(Tok.getLoc()) + SlashIdx;
1989+
bool CompletelyErroneous;
1990+
if (tryScanRegexLiteral(Offset, /*MustBeRegex*/ false, /*Diags*/ nullptr,
1991+
CompletelyErroneous)) {
1992+
// Definitely a regex literal.
1993+
return true;
1994+
}
1995+
1996+
// A prefix '/' can never be a regex literal if it failed a heuristic.
1997+
if (Tok.is(tok::oper_prefix))
1998+
return false;
1999+
2000+
// We either don't have a regex literal, or we failed a heuristic. We now need
2001+
// to make sure we don't have an unbalanced `{` or `}`, as that would have the
2002+
// potential to change the range of a skipped body if we try to more
2003+
// agressively lex a regex literal during normal parsing. If we have balanced
2004+
// `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
2005+
// worse diagnostic.
2006+
// FIXME: We ought to silence lexer diagnostics when skipping, this would
2007+
// avoid emitting a worse diagnostic.
2008+
auto *EndPtr = tryScanRegexLiteral(Offset, /*MustBeRegex*/ true,
2009+
/*Diags*/ nullptr, CompletelyErroneous);
2010+
if (!EndPtr)
2011+
return false;
2012+
2013+
Lexer L(*this, State(Tok.getLoc().getAdvancedLoc(Tok.getLength())),
2014+
State(getSourceLoc(EndPtr)), /*EnableDiagnostics*/ false);
2015+
2016+
unsigned OpenBraces = 0;
2017+
while (L.peekNextToken().isNot(tok::eof)) {
2018+
Token Tok;
2019+
L.lex(Tok);
2020+
if (Tok.is(tok::l_brace))
2021+
OpenBraces += 1;
2022+
if (Tok.is(tok::r_brace)) {
2023+
if (OpenBraces == 0)
2024+
return true;
2025+
OpenBraces -= 1;
2026+
}
2027+
}
2028+
2029+
// If we have an unbalanced `{`, this is unskippable.
2030+
return OpenBraces != 0;
2031+
}
2032+
2033+
const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
2034+
DiagnosticEngine *Diags,
2035+
bool &CompletelyErroneous) const {
19742036
// We need to have experimental string processing enabled, and have the
19752037
// parsing logic for regex literals available.
19762038
if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn)
1977-
return false;
2039+
return nullptr;
19782040

1979-
bool MustBeRegex = true;
19802041
bool IsForwardSlash = (*TokStart == '/');
19812042

19822043
// Check if we're able to lex a `/.../` regex.
19832044
if (IsForwardSlash) {
1984-
switch (ForwardSlashRegexMode) {
1985-
case LexerForwardSlashRegexMode::None:
1986-
return false;
1987-
case LexerForwardSlashRegexMode::Tentative:
1988-
MustBeRegex = false;
1989-
break;
1990-
case LexerForwardSlashRegexMode::Always:
1991-
break;
1992-
}
1993-
19942045
// For `/.../` regex literals, we need to ban space and tab at the start of
19952046
// a regex to avoid ambiguity with operator chains, e.g:
19962047
//
@@ -2008,23 +2059,25 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20082059
case ' ':
20092060
case '\t': {
20102061
if (!MustBeRegex)
2011-
return false;
2062+
return nullptr;
20122063

2013-
// We must have a regex, so emit an error for space and tab.
2014-
StringRef DiagChar;
2015-
switch (*RegexContentStart) {
2016-
case ' ':
2017-
DiagChar = "space";
2018-
break;
2019-
case '\t':
2020-
DiagChar = "tab";
2021-
break;
2022-
default:
2023-
llvm_unreachable("Unhandled case");
2064+
if (Diags) {
2065+
// We must have a regex, so emit an error for space and tab.
2066+
StringRef DiagChar;
2067+
switch (*RegexContentStart) {
2068+
case ' ':
2069+
DiagChar = "space";
2070+
break;
2071+
case '\t':
2072+
DiagChar = "tab";
2073+
break;
2074+
default:
2075+
llvm_unreachable("Unhandled case");
2076+
}
2077+
Diags->diagnose(getSourceLoc(RegexContentStart),
2078+
diag::lex_regex_literal_invalid_starting_char, DiagChar)
2079+
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20242080
}
2025-
diagnose(RegexContentStart, diag::lex_regex_literal_invalid_starting_char,
2026-
DiagChar)
2027-
.fixItInsert(getSourceLoc(RegexContentStart), "\\");
20282081
break;
20292082
}
20302083
default:
@@ -2037,25 +2090,26 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20372090
// - CompletelyErroneous will be set if there was an error that cannot be
20382091
// recovered from.
20392092
auto *Ptr = TokStart;
2040-
bool CompletelyErroneous = regexLiteralLexingFn(
2041-
&Ptr, BufferEnd, MustBeRegex,
2042-
getBridgedOptionalDiagnosticEngine(getTokenDiags()));
2093+
CompletelyErroneous = regexLiteralLexingFn(
2094+
&Ptr, BufferEnd, MustBeRegex, getBridgedOptionalDiagnosticEngine(Diags));
20432095

20442096
// If we didn't make any lexing progress, this isn't a regex literal and we
20452097
// should fallback to lexing as something else.
20462098
if (Ptr == TokStart)
2047-
return false;
2099+
return nullptr;
20482100

20492101
// If we're lexing `/.../`, error if we ended on the opening of a comment.
20502102
// We prefer to lex the comment as it's more likely than not that is what
20512103
// the user is expecting.
20522104
// TODO: This should be sunk into the Swift library.
20532105
if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) {
20542106
if (!MustBeRegex)
2055-
return false;
2056-
2057-
diagnose(TokStart, diag::lex_regex_literal_unterminated);
2107+
return nullptr;
20582108

2109+
if (Diags) {
2110+
Diags->diagnose(getSourceLoc(TokStart),
2111+
diag::lex_regex_literal_unterminated);
2112+
}
20592113
// Move the pointer back to the '/' of the comment.
20602114
Ptr--;
20612115
}
@@ -2088,7 +2142,7 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
20882142

20892143
// Invalid, so bail.
20902144
if (GroupDepth == 0)
2091-
return false;
2145+
return nullptr;
20922146

20932147
GroupDepth -= 1;
20942148
break;
@@ -2101,9 +2155,32 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) {
21012155
}
21022156
}
21032157
}
2158+
assert(Ptr > TokStart && Ptr <= BufferEnd);
2159+
return Ptr;
2160+
}
2161+
2162+
bool Lexer::tryLexRegexLiteral(const char *TokStart) {
2163+
bool IsForwardSlash = (*TokStart == '/');
2164+
bool MustBeRegex = true;
2165+
2166+
if (IsForwardSlash) {
2167+
switch (ForwardSlashRegexMode) {
2168+
case LexerForwardSlashRegexMode::None:
2169+
return false;
2170+
case LexerForwardSlashRegexMode::Tentative:
2171+
MustBeRegex = false;
2172+
break;
2173+
case LexerForwardSlashRegexMode::Always:
2174+
break;
2175+
}
2176+
}
2177+
bool CompletelyErroneous = false;
2178+
auto *Ptr = tryScanRegexLiteral(TokStart, MustBeRegex, getTokenDiags(),
2179+
CompletelyErroneous);
2180+
if (!Ptr)
2181+
return false;
21042182

21052183
// Update to point to where we ended regex lexing.
2106-
assert(Ptr > TokStart && Ptr <= BufferEnd);
21072184
CurPtr = Ptr;
21082185

21092186
// If the lexing was completely erroneous, form an unknown token.

0 commit comments

Comments
 (0)