Skip to content

Commit 1807fb3

Browse files
committed
[Parse] Avoid skipping bodies with /.../ regex literals
While skipping, if we encounter a token that looks like it could be the start of a `/.../` regex literal, fall back to parsing the function or type body normally, as such a token could become a regex literal. As such, it could treat `{` and `}` as literal, or otherwise have contents that would be lexically invalid Swift. To avoid falling back in too many cases, we apply the existing regex literal heuristics. Cases that pass the heuristic fall back to regular parsing. Cases that fail the heuristic are further checked to make sure they wouldn't contain an unbalanced `{` or `}`, but otherwise are allowed to be skipped. This allows us to continue skipping for most occurrences of infix and prefix `/`. This is meant as a lower risk workaround to fix the the issue, we ought to go back to handling regex literals in the lexer. Resolves rdar://95354010
1 parent 8a25031 commit 1807fb3

File tree

8 files changed

+389
-51
lines changed

8 files changed

+389
-51
lines changed

include/swift/Parse/Lexer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,13 @@ class Lexer {
580580
: LexerForwardSlashRegexMode::Tentative) {}
581581
};
582582

583+
/// Checks whether a given token could potentially contain the start of an
584+
/// unskippable `/.../` regex literal. Such tokens need to go through the
585+
/// parser, as they may become regex literal tokens. This includes operator
586+
/// tokens such as `!/` which could be split into prefix `!` on a regex
587+
/// literal.
588+
bool isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const;
589+
583590
private:
584591
/// Nul character meaning kind.
585592
enum class NulCharacterKind {

include/swift/Parse/Parser.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -715,13 +715,6 @@ class Parser {
715715
/// plain Tok.is(T1) check).
716716
bool skipUntilTokenOrEndOfLine(tok T1, tok T2 = tok::NUM_TOKENS);
717717

718-
/// Skip a braced block (e.g. function body). The current token must be '{'.
719-
/// Returns \c true if the parser hit the eof before finding matched '}'.
720-
///
721-
/// Set \c HasNestedTypeDeclarations to true if a token for a type
722-
/// declaration is detected in the skipped block.
723-
bool skipBracedBlock(bool &HasNestedTypeDeclarations);
724-
725718
/// Skip over SIL decls until we encounter the start of a Swift decl or eof.
726719
void skipSILUntilSwiftDecl();
727720

@@ -1000,6 +993,8 @@ class Parser {
1000993
bool canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
1001994
bool &HasNestedClassDeclarations);
1002995

996+
bool canDelayFunctionBodyParsing(bool &HasNestedTypeDeclarations);
997+
1003998
bool delayParsingDeclList(SourceLoc LBLoc, SourceLoc &RBLoc,
1004999
IterableDeclContext *IDC);
10051000

@@ -1210,9 +1205,7 @@ class Parser {
12101205
bool &hasEffectfulGet,
12111206
AccessorKind currentKind,
12121207
SourceLoc const& currentLoc);
1213-
1214-
void consumeAbstractFunctionBody(AbstractFunctionDecl *AFD,
1215-
const DeclAttributes &Attrs);
1208+
12161209
ParserResult<FuncDecl> parseDeclFunc(SourceLoc StaticLoc,
12171210
StaticSpellingKind StaticSpelling,
12181211
ParseDeclOptions Flags,

lib/Parse/Lexer.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,64 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19721972
}
19731973
}
19741974

1975+
bool Lexer::isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const {
1976+
if (!LangOpts.EnableBareSlashRegexLiterals)
1977+
return false;
1978+
1979+
// A `/.../` regex literal may only start on a binary or prefix operator.
1980+
if (Tok.isNot(tok::oper_prefix, tok::oper_binary_spaced,
1981+
tok::oper_binary_unspaced)) {
1982+
return false;
1983+
}
1984+
auto SlashIdx = Tok.getText().find("/");
1985+
if (SlashIdx == StringRef::npos)
1986+
return false;
1987+
1988+
auto Offset = getBufferPtrForSourceLoc(Tok.getLoc()) + SlashIdx;
1989+
bool CompletelyErroneous;
1990+
if (tryScanRegexLiteral(Offset, /*MustBeRegex*/ false, /*Diags*/ nullptr,
1991+
CompletelyErroneous)) {
1992+
// Definitely a regex literal.
1993+
return true;
1994+
}
1995+
1996+
// A prefix '/' can never be a regex literal if it failed a heuristic.
1997+
if (Tok.is(tok::oper_prefix))
1998+
return false;
1999+
2000+
// We either don't have a regex literal, or we failed a heuristic. We now need
2001+
// to make sure we don't have an unbalanced `{` or `}`, as that would have the
2002+
// potential to change the range of a skipped body if we try to more
2003+
// agressively lex a regex literal during normal parsing. If we have balanced
2004+
// `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
2005+
// worse diagnostic.
2006+
// FIXME: We ought to silence lexer diagnostics when skipping, this would
2007+
// avoid emitting a worse diagnostic.
2008+
auto *EndPtr = tryScanRegexLiteral(Offset, /*MustBeRegex*/ true,
2009+
/*Diags*/ nullptr, CompletelyErroneous);
2010+
if (!EndPtr)
2011+
return false;
2012+
2013+
Lexer L(*this, State(Tok.getLoc().getAdvancedLoc(Tok.getLength())),
2014+
State(getSourceLoc(EndPtr)), /*EnableDiagnostics*/ false);
2015+
2016+
unsigned OpenBraces = 0;
2017+
while (L.peekNextToken().isNot(tok::eof)) {
2018+
Token Tok;
2019+
L.lex(Tok);
2020+
if (Tok.is(tok::l_brace))
2021+
OpenBraces += 1;
2022+
if (Tok.is(tok::r_brace)) {
2023+
if (OpenBraces == 0)
2024+
return true;
2025+
OpenBraces -= 1;
2026+
}
2027+
}
2028+
2029+
// If we have an unbalanced `{`, this is unskippable.
2030+
return OpenBraces != 0;
2031+
}
2032+
19752033
const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
19762034
DiagnosticEngine *Diags,
19772035
bool &CompletelyErroneous) const {

lib/Parse/ParseDecl.cpp

Lines changed: 68 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4119,11 +4119,13 @@ static unsigned skipUntilMatchingRBrace(Parser &P,
41194119
bool &HasPoundDirective,
41204120
bool &HasOperatorDeclarations,
41214121
bool &HasNestedClassDeclarations,
4122-
bool &HasNestedTypeDeclarations) {
4122+
bool &HasNestedTypeDeclarations,
4123+
bool &HasPotentialRegexLiteral) {
41234124
HasPoundDirective = false;
41244125
HasOperatorDeclarations = false;
41254126
HasNestedClassDeclarations = false;
41264127
HasNestedTypeDeclarations = false;
4128+
HasPotentialRegexLiteral = false;
41274129

41284130
unsigned OpenBraces = 1;
41294131

@@ -4146,6 +4148,18 @@ static unsigned skipUntilMatchingRBrace(Parser &P,
41464148
HasNestedTypeDeclarations |= P.Tok.isAny(tok::kw_class, tok::kw_struct,
41474149
tok::kw_enum);
41484150

4151+
// HACK: Bail if we encounter what could potentially be a regex literal.
4152+
// This is necessary as:
4153+
// - We might encounter an invalid Swift token that might be valid in a
4154+
// regex.
4155+
// - Such a literal could contain a literal `}`, which should not be treated
4156+
// as an end brace.
4157+
// FIXME: We should be able to handle `/.../` regex literals in the lexer.
4158+
if (P.L->isPotentialUnskippableBareSlashRegexLiteral(P.Tok)) {
4159+
HasPotentialRegexLiteral = true;
4160+
return OpenBraces;
4161+
}
4162+
41494163
if (P.consumeIf(tok::l_brace)) {
41504164
++OpenBraces;
41514165
continue;
@@ -5452,12 +5466,14 @@ bool Parser::canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
54525466
CancellableBacktrackingScope BackTrack(*this);
54535467
bool HasPoundDirective;
54545468
bool HasNestedTypeDeclarations;
5469+
bool HasPotentialRegexLiteral;
54555470
skipUntilMatchingRBrace(*this,
54565471
HasPoundDirective,
54575472
HasOperatorDeclarations,
54585473
HasNestedClassDeclarations,
5459-
HasNestedTypeDeclarations);
5460-
if (!HasPoundDirective)
5474+
HasNestedTypeDeclarations,
5475+
HasPotentialRegexLiteral);
5476+
if (!HasPoundDirective && !HasPotentialRegexLiteral)
54615477
BackTrack.cancelBacktrack();
54625478
return !BackTrack.willBacktrack();
54635479
}
@@ -6133,25 +6149,31 @@ static ParameterList *parseOptionalAccessorArgument(SourceLoc SpecifierLoc,
61336149
return ParameterList::create(P.Context, StartLoc, param, EndLoc);
61346150
}
61356151

6136-
bool Parser::skipBracedBlock(bool &HasNestedTypeDeclarations) {
6152+
bool Parser::canDelayFunctionBodyParsing(bool &HasNestedTypeDeclarations) {
6153+
// If explicitly disabled, respect the flag.
6154+
if (!isDelayedParsingEnabled() && !isCodeCompletionFirstPass())
6155+
return false;
6156+
61376157
SyntaxParsingContext disabled(SyntaxContext);
61386158
SyntaxContext->disable();
6139-
consumeToken(tok::l_brace);
61406159

6141-
// We don't care if a skipped function body contained any of these, so
6142-
// just ignore them.
6160+
// Skip until the matching right curly bracket; If it has a potential regex
6161+
// literal, we can't skip. We don't care others, so just ignore them;
6162+
CancellableBacktrackingScope BackTrack(*this);
6163+
consumeToken(tok::l_brace);
61436164
bool HasPoundDirectives;
61446165
bool HasOperatorDeclarations;
61456166
bool HasNestedClassDeclarations;
6167+
bool HasPotentialRegexLiteral;
6168+
skipUntilMatchingRBrace(*this, HasPoundDirectives, HasOperatorDeclarations,
6169+
HasNestedClassDeclarations, HasNestedTypeDeclarations,
6170+
HasPotentialRegexLiteral);
6171+
if (HasPotentialRegexLiteral)
6172+
return false;
61466173

6147-
unsigned OpenBraces = skipUntilMatchingRBrace(*this,
6148-
HasPoundDirectives,
6149-
HasOperatorDeclarations,
6150-
HasNestedClassDeclarations,
6151-
HasNestedTypeDeclarations);
6152-
if (consumeIf(tok::r_brace))
6153-
--OpenBraces;
6154-
return OpenBraces != 0;
6174+
BackTrack.cancelBacktrack();
6175+
consumeIf(tok::r_brace);
6176+
return true;
61556177
}
61566178

61576179
void Parser::skipSILUntilSwiftDecl() {
@@ -7136,30 +7158,6 @@ Parser::parseDeclVar(ParseDeclOptions Flags,
71367158
return makeResult(Status);
71377159
}
71387160

7139-
void Parser::consumeAbstractFunctionBody(AbstractFunctionDecl *AFD,
7140-
const DeclAttributes &Attrs) {
7141-
auto BeginParserPosition = getParserPosition();
7142-
SourceRange BodyRange;
7143-
BodyRange.Start = Tok.getLoc();
7144-
7145-
// Advance the parser to the end of the block; '{' ... '}'.
7146-
bool HasNestedTypeDeclarations;
7147-
skipBracedBlock(HasNestedTypeDeclarations);
7148-
7149-
BodyRange.End = PreviousLoc;
7150-
7151-
AFD->setBodyDelayed(BodyRange);
7152-
AFD->setHasNestedTypeDeclarations(HasNestedTypeDeclarations);
7153-
7154-
if (isCodeCompletionFirstPass() &&
7155-
SourceMgr.rangeContainsCodeCompletionLoc(BodyRange)) {
7156-
State->setCodeCompletionDelayedDeclState(
7157-
SourceMgr, L->getBufferID(),
7158-
CodeCompletionDelayedDeclKind::FunctionBody,
7159-
PD_Default, AFD, BodyRange, BeginParserPosition.PreviousLoc);
7160-
}
7161-
}
7162-
71637161
/// Parse a 'func' declaration, returning null on error. The caller
71647162
/// handles this case and does recovery as appropriate.
71657163
///
@@ -7472,12 +7470,41 @@ void Parser::parseAbstractFunctionBody(AbstractFunctionDecl *AFD) {
74727470
// If we can delay parsing this body, or this is the first pass of code
74737471
// completion, skip until the end. If we encounter a code completion token
74747472
// while skipping, we'll make a note of it.
7475-
if (isDelayedParsingEnabled() || isCodeCompletionFirstPass()) {
7476-
consumeAbstractFunctionBody(AFD, AFD->getAttrs());
7473+
auto BodyPreviousLoc = PreviousLoc;
7474+
SourceRange BodyRange(Tok.getLoc());
7475+
auto setCodeCompletionDelayedDeclStateIfNeeded = [&] {
7476+
if (!isCodeCompletionFirstPass() ||
7477+
!SourceMgr.rangeContainsCodeCompletionLoc(BodyRange)) {
7478+
return;
7479+
}
7480+
if (State->hasCodeCompletionDelayedDeclState())
7481+
State->takeCodeCompletionDelayedDeclState();
7482+
State->setCodeCompletionDelayedDeclState(
7483+
SourceMgr, L->getBufferID(),
7484+
CodeCompletionDelayedDeclKind::FunctionBody,
7485+
PD_Default, AFD, BodyRange, BodyPreviousLoc);
7486+
};
7487+
7488+
bool HasNestedTypeDeclarations;
7489+
if (canDelayFunctionBodyParsing(HasNestedTypeDeclarations)) {
7490+
BodyRange.End = PreviousLoc;
7491+
7492+
assert(SourceMgr.isBeforeInBuffer(BodyRange.Start, BodyRange.End) ||
7493+
BodyRange.Start == BodyRange.End &&
7494+
"At least '{' should be consumed");
7495+
7496+
AFD->setBodyDelayed(BodyRange);
7497+
AFD->setHasNestedTypeDeclarations(HasNestedTypeDeclarations);
7498+
7499+
setCodeCompletionDelayedDeclStateIfNeeded();
74777500
return;
74787501
}
74797502

74807503
(void)parseAbstractFunctionBodyImpl(AFD);
7504+
assert(BodyRange.Start == AFD->getBodySourceRange().Start &&
7505+
"The start of the body should be the 'l_brace' token above");
7506+
BodyRange = AFD->getBodySourceRange();
7507+
setCodeCompletionDelayedDeclStateIfNeeded();
74817508
}
74827509

74837510
BodyAndFingerprint

lib/Parse/ParseExpr.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,8 @@ void Parser::tryLexRegexLiteral(bool forUnappliedOperator) {
886886

887887
// Check to see if we have a regex literal `/.../`, optionally with a prefix
888888
// operator e.g `!/.../`.
889+
// NOTE: If you change this logic you must also change the logic in
890+
// isPotentialUnskippableBareSlashRegexLiteral.
889891
bool mustBeRegex = false;
890892
switch (Tok.getKind()) {
891893
case tok::oper_prefix:
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// RUN: %empty-directory(%t)
2+
3+
// RUN: %target-swift-frontend -parse -enable-bare-slash-regex -disable-availability-checking -experimental-skip-all-function-bodies -stats-output-dir %t %s
4+
// RUN: %{python} %utils/process-stats-dir.py --set-csv-baseline %t/stats.csv %t
5+
// RUN: %FileCheck -input-file %t/stats.csv %s
6+
7+
// REQUIRES: swift_in_compiler
8+
9+
// Make sure we can skip in all of the below cases.
10+
11+
// We don't appear to output a stats entry when it is 0.
12+
// CHECK-NOT: {{"Parse.NumFunctionsParsed"}}
13+
14+
// Balanced `{}`, so okay.
15+
func a() { / {}/ }
16+
func b() { / \{}/ }
17+
func c() { / {"{"}/ }
18+
19+
// Some cases of infix '/' that we should continue to skip.
20+
func d() {
21+
_ = 1 / 2 + 3 * 4
22+
_ = 1 / 2 / 3 / 4
23+
}
24+
func e() {
25+
let arr = [1, 2, 3]
26+
_ = arr.reduce(0, /) / 2
27+
28+
func foo(_ i: Int, _ fn: () -> Void) {}
29+
foo(1 / 2 / 3, { print("}}}{{{") })
30+
}
31+
32+
// Some cases of prefix '/' that we should continue to skip.
33+
prefix operator /
34+
prefix func / <T> (_ x: T) -> T { x }
35+
36+
enum E {
37+
case e
38+
func foo<T>(_ x: T) {}
39+
}
40+
41+
func f() {
42+
_ = /E.e
43+
(/E.e).foo(/0)
44+
45+
func foo<T, U>(_ x: T, _ y: U) {}
46+
foo((/E.e), /E.e)
47+
foo((/)(E.e), /E.e)
48+
49+
func bar<T>(_ x: T) -> Int { 0 }
50+
_ = bar(/E.e) / 2
51+
}

0 commit comments

Comments
 (0)