Skip to content

Commit 6911553

Browse files
committed
[Lexer] Push trivia lexing down to the parser
This is an intermediate state in which the lexer delegates the responsibility for trivia lexing to the parser. Later, the parser will delegate this responsibility to SyntaxParsingContext which will hand it over to SyntaxParseAction, which will only lex the pieces if it is really necessary to do so.
1 parent 2bf5e4e commit 6911553

File tree

7 files changed

+71
-58
lines changed

7 files changed

+71
-58
lines changed

include/swift/Parse/Lexer.h

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,13 @@ class Lexer {
122122

123123
/// The current leading trivia for the next token.
124124
///
125-
/// This is only preserved if this Lexer was constructed with
126-
/// `TriviaRetentionMode::WithTrivia`.
127-
ParsedTrivia LeadingTrivia;
125+
/// The StringRef points into the source buffer that is currently being lexed.
126+
StringRef LeadingTrivia;
128127

129128
/// The current trailing trivia for the next token.
130-
///
131-
/// This is only preserved if this Lexer was constructed with
132-
/// `TriviaRetentionMode::WithTrivia`.
133-
ParsedTrivia TrailingTrivia;
134-
129+
/// The StringRef points into the source buffer that is currently being lexed.
130+
StringRef TrailingTrivia;
131+
135132
Lexer(const Lexer&) = delete;
136133
void operator=(const Lexer&) = delete;
137134

@@ -196,19 +193,19 @@ class Lexer {
196193

197194
/// Lex a token. If \c TriviaRetentionMode is \c WithTrivia, passed pointers
198195
/// to trivias are populated.
199-
void lex(Token &Result, ParsedTrivia &LeadingTriviaResult,
200-
ParsedTrivia &TrailingTriviaResult) {
196+
void lex(Token &Result, StringRef &LeadingTriviaResult,
197+
StringRef &TrailingTriviaResult) {
201198
Result = NextToken;
202199
if (TriviaRetention == TriviaRetentionMode::WithTrivia) {
203-
LeadingTriviaResult = {LeadingTrivia};
204-
TrailingTriviaResult = {TrailingTrivia};
200+
LeadingTriviaResult = LeadingTrivia;
201+
TrailingTriviaResult = TrailingTrivia;
205202
}
206203
if (Result.isNot(tok::eof))
207204
lexImpl();
208205
}
209206

210207
void lex(Token &Result) {
211-
ParsedTrivia LeadingTrivia, TrailingTrivia;
208+
StringRef LeadingTrivia, TrailingTrivia;
212209
lex(Result, LeadingTrivia, TrailingTrivia);
213210
}
214211

@@ -240,7 +237,7 @@ class Lexer {
240237
/// After restoring the state, lexer will return this token and continue from
241238
/// there.
242239
State getStateForBeginningOfToken(const Token &Tok,
243-
const ParsedTrivia &LeadingTrivia = {}) const {
240+
const StringRef &LeadingTrivia = {}) const {
244241

245242
// If the token has a comment attached to it, rewind to before the comment,
246243
// not just the start of the token. This ensures that we will re-lex and
@@ -249,8 +246,11 @@ class Lexer {
249246
if (TokStart.isInvalid())
250247
TokStart = Tok.getLoc();
251248
auto S = getStateForBeginningOfTokenLoc(TokStart);
252-
if (TriviaRetention == TriviaRetentionMode::WithTrivia)
249+
if (TriviaRetention == TriviaRetentionMode::WithTrivia) {
253250
S.LeadingTrivia = LeadingTrivia;
251+
} else {
252+
S.LeadingTrivia = StringRef();
253+
}
254254
return S;
255255
}
256256

@@ -275,8 +275,7 @@ class Lexer {
275275

276276
// Restore Trivia.
277277
if (TriviaRetention == TriviaRetentionMode::WithTrivia)
278-
if (auto &LTrivia = S.LeadingTrivia)
279-
LeadingTrivia = std::move(*LTrivia);
278+
LeadingTrivia = S.LeadingTrivia;
280279
}
281280

282281
/// Restore the lexer state to a given state that is located before
@@ -550,7 +549,6 @@ class Lexer {
550549
void lexOperatorIdentifier();
551550
void lexHexNumber();
552551
void lexNumber();
553-
void lexTrivia(ParsedTrivia &T, bool IsForTrailingTrivia);
554552
StringRef lexTrivia(bool IsForTrailingTrivia);
555553
static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);
556554

include/swift/Parse/LexerState.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class LexerState {
3939
private:
4040
explicit LexerState(SourceLoc Loc) : Loc(Loc) {}
4141
SourceLoc Loc;
42-
llvm::Optional<ParsedTrivia> LeadingTrivia;
42+
StringRef LeadingTrivia;
4343
friend class Lexer;
4444
};
4545

include/swift/Parse/Parser.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,13 @@ class Parser {
207207
/// This is the current token being considered by the parser.
208208
Token Tok;
209209

210-
/// leading trivias for \c Tok.
210+
/// Leading trivia for \c Tok.
211211
/// Always empty if !SF.shouldBuildSyntaxTree().
212-
ParsedTrivia LeadingTrivia;
212+
StringRef LeadingTrivia;
213213

214-
/// trailing trivias for \c Tok.
214+
/// Trailing trivia for \c Tok.
215215
/// Always empty if !SF.shouldBuildSyntaxTree().
216-
ParsedTrivia TrailingTrivia;
216+
StringRef TrailingTrivia;
217217

218218
/// The receiver to collect all consumed tokens.
219219
ConsumeTokenReceiver *TokReceiver;
@@ -549,7 +549,7 @@ class Parser {
549549
}
550550

551551
SourceLoc leadingTriviaLoc() {
552-
return Tok.getLoc().getAdvancedLoc(-LeadingTrivia.getLength());
552+
return Tok.getLoc().getAdvancedLoc(-LeadingTrivia.size());
553553
}
554554

555555
SourceLoc consumeIdentifier(Identifier &Result, bool diagnoseDollarPrefix) {

lib/Parse/Lexer.cpp

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -283,21 +283,23 @@ void Lexer::formToken(tok Kind, const char *TokStart) {
283283
}
284284
unsigned CommentLength = 0;
285285
if (RetainComments == CommentRetentionMode::AttachToNextToken) {
286+
auto LeadingTriviaPieces = TriviaLexer::lexTrivia(LeadingTrivia);
286287
// 'CommentLength' here is the length from the *first* comment to the
287288
// token text (or its backtick if exist).
288-
auto Iter = llvm::find_if(LeadingTrivia, [](const ParsedTriviaPiece &Piece) {
289+
auto Iter = llvm::find_if(LeadingTriviaPieces, [](const ParsedTriviaPiece &Piece) {
289290
return isCommentTriviaKind(Piece.getKind());
290291
});
291-
for (auto End = LeadingTrivia.end(); Iter != End; Iter++) {
292+
for (auto End = LeadingTriviaPieces.end(); Iter != End; Iter++) {
292293
CommentLength += Iter->getLength();
293294
}
294295
}
295296

296297
StringRef TokenText { TokStart, static_cast<size_t>(CurPtr - TokStart) };
297298

298299
if (TriviaRetention == TriviaRetentionMode::WithTrivia && Kind != tok::eof) {
299-
assert(TrailingTrivia.empty() && "TrailingTrivia is empty here");
300-
lexTrivia(TrailingTrivia, /* IsForTrailingTrivia */ true);
300+
TrailingTrivia = lexTrivia(/*IsForTrailingTrivia=*/true);
301+
} else {
302+
TrailingTrivia = StringRef();
301303
}
302304

303305
NextToken.setToken(Kind, TokenText, CommentLength);
@@ -2336,24 +2338,23 @@ void Lexer::lexImpl() {
23362338
assert(CurPtr >= BufferStart &&
23372339
CurPtr <= BufferEnd && "Current pointer out of range!");
23382340

2339-
LeadingTrivia.clear();
2340-
TrailingTrivia.clear();
2341-
2341+
const char *LeadingTriviaStart = CurPtr;
23422342
if (CurPtr == BufferStart) {
23432343
if (BufferStart < ContentStart) {
23442344
size_t BOMLen = ContentStart - BufferStart;
23452345
assert(BOMLen == 3 && "UTF-8 BOM is 3 bytes");
2346-
// Add UTF-8 BOM to LeadingTrivia.
2347-
LeadingTrivia.push_back(TriviaKind::GarbageText, BOMLen);
23482346
CurPtr += BOMLen;
23492347
}
23502348
NextToken.setAtStartOfLine(true);
23512349
} else {
23522350
NextToken.setAtStartOfLine(false);
23532351
}
23542352

2355-
lexTrivia(LeadingTrivia, /* IsForTrailingTrivia */ false);
2356-
2353+
// Advance CurPtr to the end of the first trivia in the source file and form
2354+
// the leading trivia including the BOM
2355+
lexTrivia(/*IsForTrailingTrivia=*/false);
2356+
LeadingTrivia = StringRef(LeadingTriviaStart, CurPtr - LeadingTriviaStart);
2357+
23572358
// Remember the start of the token so we can form the text range.
23582359
const char *TokStart = CurPtr;
23592360

@@ -2530,13 +2531,6 @@ Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc,
25302531
return L.peekNextToken();
25312532
}
25322533

2533-
void Lexer::lexTrivia(ParsedTrivia &Pieces, bool IsForTrailingTrivia) {
2534-
auto TriviaString = lexTrivia(IsForTrailingTrivia);
2535-
auto ParsedPieces = TriviaLexer::lexTrivia(TriviaString);
2536-
Pieces.Pieces.insert(Pieces.Pieces.end(), ParsedPieces.Pieces.begin(),
2537-
ParsedPieces.Pieces.end());
2538-
}
2539-
25402534
StringRef Lexer::lexTrivia(bool IsForTrailingTrivia) {
25412535
const char *AllTriviaStart = CurPtr;
25422536

lib/Parse/ParseDecl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,9 @@ void Parser::parseTopLevel(SmallVectorImpl<Decl *> &decls) {
210210
}
211211

212212
// Finalize the syntax context.
213-
SyntaxContext->addToken(Tok, LeadingTrivia, TrailingTrivia);
213+
auto LeadingTriviaPieces = TriviaLexer::lexTrivia(LeadingTrivia);
214+
auto TrailingTriviaPieces = TriviaLexer::lexTrivia(TrailingTrivia);
215+
SyntaxContext->addToken(Tok, LeadingTriviaPieces, TrailingTriviaPieces);
214216
}
215217

216218
bool Parser::parseTopLevelSIL() {

lib/Parse/ParseExpr.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,7 +1865,7 @@ parseStringSegments(SmallVectorImpl<Lexer::StringSegment> &Segments,
18651865
Tok.setKind(tok::string_interpolation_anchor);
18661866
// We don't allow trailing trivia for this anchor, because the
18671867
// trivia is a part of the next string segment.
1868-
TrailingTrivia.clear();
1868+
TrailingTrivia = StringRef();
18691869
consumeToken();
18701870
}
18711871
break;
@@ -1910,17 +1910,19 @@ ParserResult<Expr> Parser::parseExprStringLiteral() {
19101910
Token OpenQuote(QuoteKind, OpenQuoteStr);
19111911
Token CloseQuote(QuoteKind, CloseQuoteStr);
19121912
ParsedTrivia EmptyTrivia;
1913-
ParsedTrivia EntireTrailingTrivia = TrailingTrivia;
1913+
StringRef EntireTrailingTrivia = TrailingTrivia;
19141914

19151915
if (HasCustomDelimiter) {
1916+
auto LeadingTriviaPieces = TriviaLexer::lexTrivia(LeadingTrivia);
19161917
Token OpenDelimiter(tok::raw_string_delimiter, OpenDelimiterStr);
19171918
// When a custom delimiter is present, it owns the leading trivia.
1918-
SyntaxContext->addToken(OpenDelimiter, LeadingTrivia, EmptyTrivia);
1919+
SyntaxContext->addToken(OpenDelimiter, LeadingTriviaPieces, EmptyTrivia);
19191920

19201921
SyntaxContext->addToken(OpenQuote, EmptyTrivia, EmptyTrivia);
19211922
} else {
1923+
auto LeadingTriviaPieces = TriviaLexer::lexTrivia(LeadingTrivia);
19221924
// Without custom delimiter the quote owns trailing trivia.
1923-
SyntaxContext->addToken(OpenQuote, LeadingTrivia, EmptyTrivia);
1925+
SyntaxContext->addToken(OpenQuote, LeadingTriviaPieces, EmptyTrivia);
19241926
}
19251927

19261928
// The simple case: just a single literal segment.
@@ -1949,10 +1951,16 @@ ParserResult<Expr> Parser::parseExprStringLiteral() {
19491951

19501952
Token CloseDelimiter(tok::raw_string_delimiter, CloseDelimiterStr);
19511953
// When a custom delimiter is present it owns the trailing trivia.
1952-
SyntaxContext->addToken(CloseDelimiter, EmptyTrivia, EntireTrailingTrivia);
1954+
auto EntireTrailingTriviaPieces =
1955+
TriviaLexer::lexTrivia(EntireTrailingTrivia);
1956+
SyntaxContext->addToken(CloseDelimiter, EmptyTrivia,
1957+
EntireTrailingTriviaPieces);
19531958
} else {
19541959
// Without custom delimiter the quote owns trailing trivia.
1955-
SyntaxContext->addToken(CloseQuote, EmptyTrivia, EntireTrailingTrivia);
1960+
auto EntireTrailingTriviaPieces =
1961+
TriviaLexer::lexTrivia(EntireTrailingTrivia);
1962+
SyntaxContext->addToken(CloseQuote, EmptyTrivia,
1963+
EntireTrailingTriviaPieces);
19561964
}
19571965

19581966
return makeParserResult(
@@ -1965,8 +1973,8 @@ ParserResult<Expr> Parser::parseExprStringLiteral() {
19651973
// We are going to mess with Tok to do reparsing for interpolated literals,
19661974
// don't lose our 'next' token.
19671975
llvm::SaveAndRestore<Token> SavedTok(Tok);
1968-
llvm::SaveAndRestore<ParsedTrivia> SavedLeadingTrivia(LeadingTrivia);
1969-
llvm::SaveAndRestore<ParsedTrivia> SavedTrailingTrivia(TrailingTrivia);
1976+
llvm::SaveAndRestore<StringRef> SavedLeadingTrivia(LeadingTrivia);
1977+
llvm::SaveAndRestore<StringRef> SavedTrailingTrivia(TrailingTrivia);
19701978
// For errors, we need the real PreviousLoc, i.e. the start of the
19711979
// whole InterpolatedStringLiteral.
19721980
llvm::SaveAndRestore<SourceLoc> SavedPreviousLoc(PreviousLoc);
@@ -2015,10 +2023,16 @@ ParserResult<Expr> Parser::parseExprStringLiteral() {
20152023

20162024
Token CloseDelimiter(tok::raw_string_delimiter, CloseDelimiterStr);
20172025
// When a custom delimiter is present it owns the trailing trivia.
2018-
SyntaxContext->addToken(CloseDelimiter, EmptyTrivia, EntireTrailingTrivia);
2026+
auto EntireTrailingTriviaPieces =
2027+
TriviaLexer::lexTrivia(EntireTrailingTrivia);
2028+
SyntaxContext->addToken(CloseDelimiter, EmptyTrivia,
2029+
EntireTrailingTriviaPieces);
20192030
} else {
20202031
// Without custom delimiter the quote owns trailing trivia.
2021-
SyntaxContext->addToken(CloseQuote, EmptyTrivia, EntireTrailingTrivia);
2032+
auto EntireTrailingTriviaPieces =
2033+
TriviaLexer::lexTrivia(EntireTrailingTrivia);
2034+
SyntaxContext->addToken(CloseQuote, EmptyTrivia,
2035+
EntireTrailingTriviaPieces);
20222036
}
20232037

20242038
if (AppendingExpr->getBody()->getNumElements() == 1) {

lib/Parse/Parser.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ void tokenize(const LangOptions &LangOpts, const SourceManager &SM,
7474
}
7575

7676
Token Tok;
77-
ParsedTrivia LeadingTrivia, TrailingTrivia;
77+
StringRef LeadingTrivia, TrailingTrivia;
7878
do {
7979
L.lex(Tok, LeadingTrivia, TrailingTrivia);
8080

@@ -99,7 +99,9 @@ void tokenize(const LangOptions &LangOpts, const SourceManager &SM,
9999
DestFunc(StrTok, ParsedTrivia(), ParsedTrivia());
100100
}
101101
} else {
102-
DestFunc(Tok, LeadingTrivia, TrailingTrivia);
102+
auto LeadingTriviaPieces = TriviaLexer::lexTrivia(LeadingTrivia);
103+
auto TrailingTriviaPieces = TriviaLexer::lexTrivia(TrailingTrivia);
104+
DestFunc(Tok, LeadingTriviaPieces, TrailingTriviaPieces);
103105
}
104106

105107
} while (Tok.getKind() != tok::eof);
@@ -604,7 +606,9 @@ void Parser::consumeExtraToken(Token Extra) {
604606

605607
SourceLoc Parser::consumeToken() {
606608
TokReceiver->receive(Tok);
607-
SyntaxContext->addToken(Tok, LeadingTrivia, TrailingTrivia);
609+
auto LeadingTriviaPieces = TriviaLexer::lexTrivia(LeadingTrivia);
610+
auto TrailingTriviaPieces = TriviaLexer::lexTrivia(TrailingTrivia);
611+
SyntaxContext->addToken(Tok, LeadingTriviaPieces, TrailingTriviaPieces);
608612
return consumeTokenWithoutFeedingReceiver();
609613
}
610614

@@ -640,7 +644,8 @@ void Parser::markSplitToken(tok Kind, StringRef Txt) {
640644
SplitTokens.emplace_back();
641645
SplitTokens.back().setToken(Kind, Txt);
642646
ParsedTrivia EmptyTrivia;
643-
SyntaxContext->addToken(SplitTokens.back(), LeadingTrivia, EmptyTrivia);
647+
auto LeadingTriviaPieces = TriviaLexer::lexTrivia(LeadingTrivia);
648+
SyntaxContext->addToken(SplitTokens.back(), LeadingTriviaPieces, EmptyTrivia);
644649
TokReceiver->receive(SplitTokens.back());
645650
}
646651

@@ -839,7 +844,7 @@ bool Parser::loadCurrentSyntaxNodeFromCache() {
839844
}
840845
unsigned LexerOffset =
841846
SourceMgr.getLocOffsetInBuffer(Tok.getLoc(), L->getBufferID());
842-
unsigned LeadingTriviaLen = LeadingTrivia.getLength();
847+
unsigned LeadingTriviaLen = LeadingTrivia.size();
843848
unsigned LeadingTriviaOffset = LexerOffset - LeadingTriviaLen;
844849
SourceLoc LeadingTriviaLoc = Tok.getLoc().getAdvancedLoc(-LeadingTriviaLen);
845850
if (auto TextLength = SyntaxContext->lookupNode(LeadingTriviaOffset,

0 commit comments

Comments
 (0)