Skip to content

Commit 515945f

Browse files
committed
[Parse] Avoid skipping bodies with /.../ regex literals
While skipping, if we encounter a token that looks like it could be the start of a `/.../` regex literal, fall back to parsing the function or type body normally, as such a token could become a regex literal. As such, it could treat `{` and `}` as literal, or otherwise have contents that would be lexically invalid Swift. To avoid falling back in too many cases, we apply the existing regex literal heuristics. Cases that pass the heuristic fall back to regular parsing. Cases that fail the heuristic are further checked to make sure they wouldn't contain an unbalanced `{` or `}`, but otherwise are allowed to be skipped. This allows us to continue skipping for most occurrences of infix and prefix `/`. This is meant as a lower risk workaround to fix the the issue, we ought to go back to handling regex literals in the lexer. Resolves rdar://95354010
1 parent 325ba43 commit 515945f

File tree

8 files changed

+402
-51
lines changed

8 files changed

+402
-51
lines changed

include/swift/Parse/Lexer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,13 @@ class Lexer {
580580
: LexerForwardSlashRegexMode::Tentative) {}
581581
};
582582

583+
/// Checks whether a given token could potentially contain the start of an
584+
/// unskippable `/.../` regex literal. Such tokens need to go through the
585+
/// parser, as they may become regex literal tokens. This includes operator
586+
/// tokens such as `!/` which could be split into prefix `!` on a regex
587+
/// literal.
588+
bool isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const;
589+
583590
private:
584591
/// Nul character meaning kind.
585592
enum class NulCharacterKind {

include/swift/Parse/Parser.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -718,13 +718,6 @@ class Parser {
718718
/// plain Tok.is(T1) check).
719719
bool skipUntilTokenOrEndOfLine(tok T1, tok T2 = tok::NUM_TOKENS);
720720

721-
/// Skip a braced block (e.g. function body). The current token must be '{'.
722-
/// Returns \c true if the parser hit the eof before finding matched '}'.
723-
///
724-
/// Set \c HasNestedTypeDeclarations to true if a token for a type
725-
/// declaration is detected in the skipped block.
726-
bool skipBracedBlock(bool &HasNestedTypeDeclarations);
727-
728721
/// Skip over SIL decls until we encounter the start of a Swift decl or eof.
729722
void skipSILUntilSwiftDecl();
730723

@@ -1001,6 +994,8 @@ class Parser {
1001994
bool canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
1002995
bool &HasNestedClassDeclarations);
1003996

997+
bool canDelayFunctionBodyParsing(bool &HasNestedTypeDeclarations);
998+
1004999
bool delayParsingDeclList(SourceLoc LBLoc, SourceLoc &RBLoc,
10051000
IterableDeclContext *IDC);
10061001

@@ -1211,9 +1206,7 @@ class Parser {
12111206
bool &hasEffectfulGet,
12121207
AccessorKind currentKind,
12131208
SourceLoc const& currentLoc);
1214-
1215-
void consumeAbstractFunctionBody(AbstractFunctionDecl *AFD,
1216-
const DeclAttributes &Attrs);
1209+
12171210
ParserResult<FuncDecl> parseDeclFunc(SourceLoc StaticLoc,
12181211
StaticSpellingKind StaticSpelling,
12191212
ParseDeclOptions Flags,

lib/Parse/Lexer.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,64 @@ const char *Lexer::findEndOfCurlyQuoteStringLiteral(const char *Body,
19801980
}
19811981
}
19821982

1983+
bool Lexer::isPotentialUnskippableBareSlashRegexLiteral(const Token &Tok) const {
1984+
if (!LangOpts.hasFeature(Feature::BareSlashRegexLiterals))
1985+
return false;
1986+
1987+
// A `/.../` regex literal may only start on a binary or prefix operator.
1988+
if (Tok.isNot(tok::oper_prefix, tok::oper_binary_spaced,
1989+
tok::oper_binary_unspaced)) {
1990+
return false;
1991+
}
1992+
auto SlashIdx = Tok.getText().find("/");
1993+
if (SlashIdx == StringRef::npos)
1994+
return false;
1995+
1996+
auto Offset = getBufferPtrForSourceLoc(Tok.getLoc()) + SlashIdx;
1997+
bool CompletelyErroneous;
1998+
if (tryScanRegexLiteral(Offset, /*MustBeRegex*/ false, /*Diags*/ nullptr,
1999+
CompletelyErroneous)) {
2000+
// Definitely a regex literal.
2001+
return true;
2002+
}
2003+
2004+
// A prefix '/' can never be a regex literal if it failed a heuristic.
2005+
if (Tok.is(tok::oper_prefix))
2006+
return false;
2007+
2008+
// We either don't have a regex literal, or we failed a heuristic. We now need
2009+
// to make sure we don't have an unbalanced `{` or `}`, as that would have the
2010+
// potential to change the range of a skipped body if we try to more
2011+
// agressively lex a regex literal during normal parsing. If we have balanced
2012+
// `{` + `}`, we can proceed with skipping. Worst case scenario is we emit a
2013+
// worse diagnostic.
2014+
// FIXME: We ought to silence lexer diagnostics when skipping, this would
2015+
// avoid emitting a worse diagnostic.
2016+
auto *EndPtr = tryScanRegexLiteral(Offset, /*MustBeRegex*/ true,
2017+
/*Diags*/ nullptr, CompletelyErroneous);
2018+
if (!EndPtr)
2019+
return false;
2020+
2021+
Lexer L(*this, State(Tok.getLoc().getAdvancedLoc(Tok.getLength())),
2022+
State(getSourceLoc(EndPtr)), /*EnableDiagnostics*/ false);
2023+
2024+
unsigned OpenBraces = 0;
2025+
while (L.peekNextToken().isNot(tok::eof)) {
2026+
Token Tok;
2027+
L.lex(Tok);
2028+
if (Tok.is(tok::l_brace))
2029+
OpenBraces += 1;
2030+
if (Tok.is(tok::r_brace)) {
2031+
if (OpenBraces == 0)
2032+
return true;
2033+
OpenBraces -= 1;
2034+
}
2035+
}
2036+
2037+
// If we have an unbalanced `{`, this is unskippable.
2038+
return OpenBraces != 0;
2039+
}
2040+
19832041
const char *Lexer::tryScanRegexLiteral(const char *TokStart, bool MustBeRegex,
19842042
DiagnosticEngine *Diags,
19852043
bool &CompletelyErroneous) const {

lib/Parse/ParseDecl.cpp

Lines changed: 68 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4121,11 +4121,13 @@ static unsigned skipUntilMatchingRBrace(Parser &P,
41214121
bool &HasPoundDirective,
41224122
bool &HasOperatorDeclarations,
41234123
bool &HasNestedClassDeclarations,
4124-
bool &HasNestedTypeDeclarations) {
4124+
bool &HasNestedTypeDeclarations,
4125+
bool &HasPotentialRegexLiteral) {
41254126
HasPoundDirective = false;
41264127
HasOperatorDeclarations = false;
41274128
HasNestedClassDeclarations = false;
41284129
HasNestedTypeDeclarations = false;
4130+
HasPotentialRegexLiteral = false;
41294131

41304132
unsigned OpenBraces = 1;
41314133

@@ -4148,6 +4150,18 @@ static unsigned skipUntilMatchingRBrace(Parser &P,
41484150
HasNestedTypeDeclarations |= P.Tok.isAny(tok::kw_class, tok::kw_struct,
41494151
tok::kw_enum);
41504152

4153+
// HACK: Bail if we encounter what could potentially be a regex literal.
4154+
// This is necessary as:
4155+
// - We might encounter an invalid Swift token that might be valid in a
4156+
// regex.
4157+
// - Such a literal could contain a literal `}`, which should not be treated
4158+
// as an end brace.
4159+
// FIXME: We should be able to handle `/.../` regex literals in the lexer.
4160+
if (P.L->isPotentialUnskippableBareSlashRegexLiteral(P.Tok)) {
4161+
HasPotentialRegexLiteral = true;
4162+
return OpenBraces;
4163+
}
4164+
41514165
if (P.consumeIf(tok::l_brace)) {
41524166
++OpenBraces;
41534167
continue;
@@ -5456,12 +5470,14 @@ bool Parser::canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
54565470
CancellableBacktrackingScope BackTrack(*this);
54575471
bool HasPoundDirective;
54585472
bool HasNestedTypeDeclarations;
5473+
bool HasPotentialRegexLiteral;
54595474
skipUntilMatchingRBrace(*this,
54605475
HasPoundDirective,
54615476
HasOperatorDeclarations,
54625477
HasNestedClassDeclarations,
5463-
HasNestedTypeDeclarations);
5464-
if (!HasPoundDirective)
5478+
HasNestedTypeDeclarations,
5479+
HasPotentialRegexLiteral);
5480+
if (!HasPoundDirective && !HasPotentialRegexLiteral)
54655481
BackTrack.cancelBacktrack();
54665482
return !BackTrack.willBacktrack();
54675483
}
@@ -6137,25 +6153,31 @@ static ParameterList *parseOptionalAccessorArgument(SourceLoc SpecifierLoc,
61376153
return ParameterList::create(P.Context, StartLoc, param, EndLoc);
61386154
}
61396155

6140-
bool Parser::skipBracedBlock(bool &HasNestedTypeDeclarations) {
6156+
bool Parser::canDelayFunctionBodyParsing(bool &HasNestedTypeDeclarations) {
6157+
// If explicitly disabled, respect the flag.
6158+
if (!isDelayedParsingEnabled() && !isCodeCompletionFirstPass())
6159+
return false;
6160+
61416161
SyntaxParsingContext disabled(SyntaxContext);
61426162
SyntaxContext->disable();
6143-
consumeToken(tok::l_brace);
61446163

6145-
// We don't care if a skipped function body contained any of these, so
6146-
// just ignore them.
6164+
// Skip until the matching right curly bracket; If it has a potential regex
6165+
// literal, we can't skip. We don't care others, so just ignore them;
6166+
CancellableBacktrackingScope BackTrack(*this);
6167+
consumeToken(tok::l_brace);
61476168
bool HasPoundDirectives;
61486169
bool HasOperatorDeclarations;
61496170
bool HasNestedClassDeclarations;
6171+
bool HasPotentialRegexLiteral;
6172+
skipUntilMatchingRBrace(*this, HasPoundDirectives, HasOperatorDeclarations,
6173+
HasNestedClassDeclarations, HasNestedTypeDeclarations,
6174+
HasPotentialRegexLiteral);
6175+
if (HasPotentialRegexLiteral)
6176+
return false;
61506177

6151-
unsigned OpenBraces = skipUntilMatchingRBrace(*this,
6152-
HasPoundDirectives,
6153-
HasOperatorDeclarations,
6154-
HasNestedClassDeclarations,
6155-
HasNestedTypeDeclarations);
6156-
if (consumeIf(tok::r_brace))
6157-
--OpenBraces;
6158-
return OpenBraces != 0;
6178+
BackTrack.cancelBacktrack();
6179+
consumeIf(tok::r_brace);
6180+
return true;
61596181
}
61606182

61616183
void Parser::skipSILUntilSwiftDecl() {
@@ -7145,30 +7167,6 @@ Parser::parseDeclVar(ParseDeclOptions Flags,
71457167
return makeResult(Status);
71467168
}
71477169

7148-
void Parser::consumeAbstractFunctionBody(AbstractFunctionDecl *AFD,
7149-
const DeclAttributes &Attrs) {
7150-
auto BeginParserPosition = getParserPosition();
7151-
SourceRange BodyRange;
7152-
BodyRange.Start = Tok.getLoc();
7153-
7154-
// Advance the parser to the end of the block; '{' ... '}'.
7155-
bool HasNestedTypeDeclarations;
7156-
skipBracedBlock(HasNestedTypeDeclarations);
7157-
7158-
BodyRange.End = PreviousLoc;
7159-
7160-
AFD->setBodyDelayed(BodyRange);
7161-
AFD->setHasNestedTypeDeclarations(HasNestedTypeDeclarations);
7162-
7163-
if (isCodeCompletionFirstPass() &&
7164-
SourceMgr.rangeContainsCodeCompletionLoc(BodyRange)) {
7165-
State->setCodeCompletionDelayedDeclState(
7166-
SourceMgr, L->getBufferID(),
7167-
CodeCompletionDelayedDeclKind::FunctionBody,
7168-
PD_Default, AFD, BodyRange, BeginParserPosition.PreviousLoc);
7169-
}
7170-
}
7171-
71727170
/// Parse a 'func' declaration, returning null on error. The caller
71737171
/// handles this case and does recovery as appropriate.
71747172
///
@@ -7481,12 +7479,41 @@ void Parser::parseAbstractFunctionBody(AbstractFunctionDecl *AFD) {
74817479
// If we can delay parsing this body, or this is the first pass of code
74827480
// completion, skip until the end. If we encounter a code completion token
74837481
// while skipping, we'll make a note of it.
7484-
if (isDelayedParsingEnabled() || isCodeCompletionFirstPass()) {
7485-
consumeAbstractFunctionBody(AFD, AFD->getAttrs());
7482+
auto BodyPreviousLoc = PreviousLoc;
7483+
SourceRange BodyRange(Tok.getLoc());
7484+
auto setCodeCompletionDelayedDeclStateIfNeeded = [&] {
7485+
if (!isCodeCompletionFirstPass() ||
7486+
!SourceMgr.rangeContainsCodeCompletionLoc(BodyRange)) {
7487+
return;
7488+
}
7489+
if (State->hasCodeCompletionDelayedDeclState())
7490+
State->takeCodeCompletionDelayedDeclState();
7491+
State->setCodeCompletionDelayedDeclState(
7492+
SourceMgr, L->getBufferID(),
7493+
CodeCompletionDelayedDeclKind::FunctionBody,
7494+
PD_Default, AFD, BodyRange, BodyPreviousLoc);
7495+
};
7496+
7497+
bool HasNestedTypeDeclarations;
7498+
if (canDelayFunctionBodyParsing(HasNestedTypeDeclarations)) {
7499+
BodyRange.End = PreviousLoc;
7500+
7501+
assert(SourceMgr.isBeforeInBuffer(BodyRange.Start, BodyRange.End) ||
7502+
BodyRange.Start == BodyRange.End &&
7503+
"At least '{' should be consumed");
7504+
7505+
AFD->setBodyDelayed(BodyRange);
7506+
AFD->setHasNestedTypeDeclarations(HasNestedTypeDeclarations);
7507+
7508+
setCodeCompletionDelayedDeclStateIfNeeded();
74867509
return;
74877510
}
74887511

74897512
(void)parseAbstractFunctionBodyImpl(AFD);
7513+
assert(BodyRange.Start == AFD->getBodySourceRange().Start &&
7514+
"The start of the body should be the 'l_brace' token above");
7515+
BodyRange = AFD->getBodySourceRange();
7516+
setCodeCompletionDelayedDeclStateIfNeeded();
74907517
}
74917518

74927519
BodyAndFingerprint

lib/Parse/ParseExpr.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,8 @@ void Parser::tryLexRegexLiteral(bool forUnappliedOperator) {
887887

888888
// Check to see if we have a regex literal `/.../`, optionally with a prefix
889889
// operator e.g `!/.../`.
890+
// NOTE: If you change this logic you must also change the logic in
891+
// isPotentialUnskippableBareSlashRegexLiteral.
890892
bool mustBeRegex = false;
891893
switch (Tok.getKind()) {
892894
case tok::oper_prefix:
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// RUN: %empty-directory(%t)
2+
3+
// RUN: %target-swift-frontend -parse -enable-bare-slash-regex -disable-availability-checking -experimental-skip-all-function-bodies -stats-output-dir %t %s
4+
// RUN: %{python} %utils/process-stats-dir.py --set-csv-baseline %t/stats.csv %t
5+
// RUN: %FileCheck -input-file %t/stats.csv %s
6+
7+
// REQUIRES: swift_in_compiler
8+
9+
// Make sure we can skip in all of the below cases.
10+
11+
// We don't appear to output a stats entry when it is 0.
12+
// CHECK-NOT: {{"Parse.NumFunctionsParsed"}}
13+
14+
// Balanced `{}`, so okay.
15+
func a() { / {}/ }
16+
func b() { / \{}/ }
17+
func c() { / {"{"}/ }
18+
19+
// Some cases of infix '/' that we should continue to skip.
20+
func d() {
21+
_ = 1 / 2 + 3 * 4
22+
_ = 1 / 2 / 3 / 4
23+
}
24+
func e() {
25+
let arr = [1, 2, 3]
26+
_ = arr.reduce(0, /) / 2
27+
28+
func foo(_ i: Int, _ fn: () -> Void) {}
29+
foo(1 / 2 / 3, { print("}}}{{{") })
30+
}
31+
32+
// Some cases of prefix '/' that we should continue to skip.
33+
prefix operator /
34+
prefix func / <T> (_ x: T) -> T { x }
35+
36+
enum E {
37+
case e
38+
func foo<T>(_ x: T) {}
39+
}
40+
41+
func f() {
42+
_ = /E.e
43+
(/E.e).foo(/0)
44+
45+
func foo<T, U>(_ x: T, _ y: U) {}
46+
foo((/E.e), /E.e)
47+
foo((/)(E.e), /E.e)
48+
49+
func bar<T>(_ x: T) -> Int { 0 }
50+
_ = bar(/E.e) / 2
51+
}
52+
53+
postfix operator /
54+
prefix func / <T> (_ x: T) -> T { x }
55+
56+
// Some cases of postfix '/' that we should continue to skip.
57+
func g() {
58+
_ = 0/
59+
_ = 0/ / 1/
60+
_ = 1/ + 1/
61+
_ = 1 + 2/
62+
}

0 commit comments

Comments
 (0)