Skip to content

Commit 080d59b

Browse files
committed
[Lexer] Delay token diagnostics
Queue up diagnostics when lexing, waiting until `Lexer::lex` is called before emitting them. This allows us to re-lex without having to deal with previously invalid tokens.
1 parent 07b3c76 commit 080d59b

File tree

3 files changed

+58
-24
lines changed

3 files changed

+58
-24
lines changed

include/swift/Parse/Lexer.h

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,10 @@ class Lexer {
7575
const LangOptions &LangOpts;
7676
const SourceManager &SourceMgr;
7777
const unsigned BufferID;
78-
DiagnosticEngine *Diags;
78+
79+
/// A queue of diagnostics to emit when a token is consumed. We want to queue
80+
/// them, as the parser may backtrack and re-lex a token.
81+
Optional<DiagnosticQueue> DiagQueue;
7982

8083
using State = LexerState;
8184

@@ -154,6 +157,19 @@ class Lexer {
154157

155158
void initialize(unsigned Offset, unsigned EndOffset);
156159

160+
/// Retrieve the diagnostic engine for emitting diagnostics for the current
161+
/// token.
162+
DiagnosticEngine *getTokenDiags() {
163+
return DiagQueue ? &DiagQueue->getDiags() : nullptr;
164+
}
165+
166+
/// Retrieve the underlying diagnostic engine we emit diagnostics to. Note
167+
/// this should only be used for diagnostics not concerned with the current
168+
/// token.
169+
DiagnosticEngine *getUnderlyingDiags() {
170+
return DiagQueue ? &DiagQueue->getUnderlyingDiags() : nullptr;
171+
}
172+
157173
public:
158174
/// Create a normal lexer that scans the whole source buffer.
159175
///
@@ -209,6 +225,10 @@ class Lexer {
209225
LeadingTriviaResult = LeadingTrivia;
210226
TrailingTriviaResult = TrailingTrivia;
211227
}
228+
// Emit any diagnostics recorded for this token.
229+
if (DiagQueue)
230+
DiagQueue->emit();
231+
212232
if (Result.isNot(tok::eof))
213233
lexImpl();
214234
}
@@ -298,12 +318,12 @@ class Lexer {
298318
void restoreState(State S, bool enableDiagnostics = false) {
299319
assert(S.isValid());
300320
CurPtr = getBufferPtrForSourceLoc(S.Loc);
301-
// Don't reemit diagnostics while readvancing the lexer.
302-
llvm::SaveAndRestore<DiagnosticEngine*>
303-
D(Diags, enableDiagnostics ? Diags : nullptr);
304-
305321
lexImpl();
306322

323+
// Don't re-emit diagnostics from readvancing the lexer.
324+
if (DiagQueue && !enableDiagnostics)
325+
DiagQueue->clear();
326+
307327
// Restore Trivia.
308328
if (TriviaRetention == TriviaRetentionMode::WithTrivia)
309329
LeadingTrivia = S.LeadingTrivia;
@@ -505,7 +525,7 @@ class Lexer {
505525

506526
void getStringLiteralSegments(const Token &Str,
507527
SmallVectorImpl<StringSegment> &Segments) {
508-
return getStringLiteralSegments(Str, Segments, Diags);
528+
return getStringLiteralSegments(Str, Segments, getTokenDiags());
509529
}
510530

511531
static SourceLoc getSourceLoc(const char *Loc) {

lib/Parse/Lexer.cpp

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,12 @@ Lexer::Lexer(const PrincipalTag &, const LangOptions &LangOpts,
183183
HashbangMode HashbangAllowed, CommentRetentionMode RetainComments,
184184
TriviaRetentionMode TriviaRetention)
185185
: LangOpts(LangOpts), SourceMgr(SourceMgr), BufferID(BufferID),
186-
Diags(Diags), LexMode(LexMode),
186+
LexMode(LexMode),
187187
IsHashbangAllowed(HashbangAllowed == HashbangMode::Allowed),
188-
RetainComments(RetainComments), TriviaRetention(TriviaRetention) {}
188+
RetainComments(RetainComments), TriviaRetention(TriviaRetention) {
189+
if (Diags)
190+
DiagQueue.emplace(*Diags, /*emitOnDestruction*/ false);
191+
}
189192

190193
void Lexer::initialize(unsigned Offset, unsigned EndOffset) {
191194
assert(Offset <= EndOffset);
@@ -245,7 +248,7 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr,
245248

246249
Lexer::Lexer(Lexer &Parent, State BeginState, State EndState)
247250
: Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID,
248-
Parent.Diags, Parent.LexMode,
251+
Parent.getUnderlyingDiags(), Parent.LexMode,
249252
Parent.IsHashbangAllowed
250253
? HashbangMode::Allowed
251254
: HashbangMode::Disallowed,
@@ -261,7 +264,7 @@ Lexer::Lexer(Lexer &Parent, State BeginState, State EndState)
261264
}
262265

263266
InFlightDiagnostic Lexer::diagnose(const char *Loc, Diagnostic Diag) {
264-
if (Diags)
267+
if (auto *Diags = getTokenDiags())
265268
return Diags->diagnose(getSourceLoc(Loc), Diag);
266269

267270
return InFlightDiagnostic();
@@ -272,7 +275,7 @@ Token Lexer::getTokenAt(SourceLoc Loc) {
272275
SourceMgr.findBufferContainingLoc(Loc)) &&
273276
"location from the wrong buffer");
274277

275-
Lexer L(LangOpts, SourceMgr, BufferID, Diags, LexMode,
278+
Lexer L(LangOpts, SourceMgr, BufferID, getUnderlyingDiags(), LexMode,
276279
HashbangMode::Allowed, CommentRetentionMode::None,
277280
TriviaRetentionMode::WithoutTrivia);
278281
L.restoreState(State(Loc));
@@ -330,6 +333,7 @@ void Lexer::formStringLiteralToken(const char *TokStart,
330333
return;
331334
NextToken.setStringLiteral(IsMultilineString, CustomDelimiterLen);
332335

336+
auto *Diags = getTokenDiags();
333337
if (IsMultilineString && Diags)
334338
validateMultilineIndents(NextToken, Diags);
335339
}
@@ -416,7 +420,8 @@ static bool advanceToEndOfLine(const char *&CurPtr, const char *BufferEnd,
416420
}
417421

418422
void Lexer::skipToEndOfLine(bool EatNewline) {
419-
bool isEOL = advanceToEndOfLine(CurPtr, BufferEnd, CodeCompletionPtr, Diags);
423+
bool isEOL =
424+
advanceToEndOfLine(CurPtr, BufferEnd, CodeCompletionPtr, getTokenDiags());
420425
if (EatNewline && isEOL) {
421426
++CurPtr;
422427
NextToken.setAtStartOfLine(true);
@@ -514,8 +519,8 @@ static bool skipToEndOfSlashStarComment(const char *&CurPtr,
514519
/// skipSlashStarComment - /**/ comments are skipped (treated as whitespace).
515520
/// Note that (unlike in C) block comments can be nested.
516521
void Lexer::skipSlashStarComment() {
517-
bool isMultiline =
518-
skipToEndOfSlashStarComment(CurPtr, BufferEnd, CodeCompletionPtr, Diags);
522+
bool isMultiline = skipToEndOfSlashStarComment(
523+
CurPtr, BufferEnd, CodeCompletionPtr, getTokenDiags());
519524
if (isMultiline)
520525
NextToken.setAtStartOfLine(true);
521526
}
@@ -1360,7 +1365,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
13601365
if (!IsMultilineString && !CustomDelimiterLen)
13611366
return ~0U;
13621367

1363-
DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr;
1368+
DiagnosticEngine *D = EmitDiagnostics ? getTokenDiags() : nullptr;
13641369
auto TmpPtr = CurPtr;
13651370
if (IsMultilineString &&
13661371
!advanceIfMultilineDelimiter(CustomDelimiterLen, TmpPtr, D))
@@ -1385,7 +1390,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
13851390
return CurPtr[-1];
13861391
case '\\': // Escapes.
13871392
if (!delimiterMatches(CustomDelimiterLen, CurPtr,
1388-
EmitDiagnostics ? Diags : nullptr))
1393+
EmitDiagnostics ? getTokenDiags() : nullptr))
13891394
return '\\';
13901395
break;
13911396
}
@@ -1799,7 +1804,7 @@ static void validateMultilineIndents(const Token &Str,
17991804
void Lexer::diagnoseSingleQuoteStringLiteral(const char *TokStart,
18001805
const char *TokEnd) {
18011806
assert(*TokStart == '\'' && TokEnd[-1] == '\'');
1802-
if (!Diags) // or assert?
1807+
if (!getTokenDiags()) // or assert?
18031808
return;
18041809

18051810
auto startLoc = Lexer::getSourceLoc(TokStart);
@@ -1836,7 +1841,7 @@ void Lexer::diagnoseSingleQuoteStringLiteral(const char *TokStart,
18361841
replacement.append(OutputPtr, Ptr - 1);
18371842
replacement.push_back('"');
18381843

1839-
Diags->diagnose(startLoc, diag::lex_single_quote_string)
1844+
getTokenDiags()->diagnose(startLoc, diag::lex_single_quote_string)
18401845
.fixItReplaceChars(startLoc, endLoc, replacement);
18411846
}
18421847

@@ -1852,8 +1857,8 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
18521857
// diagnostics about changing them to double quotes.
18531858
assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start");
18541859

1855-
bool IsMultilineString = advanceIfMultilineDelimiter(CustomDelimiterLen,
1856-
CurPtr, Diags, true);
1860+
bool IsMultilineString = advanceIfMultilineDelimiter(
1861+
CustomDelimiterLen, CurPtr, getTokenDiags(), true);
18571862
if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r')
18581863
diagnose(CurPtr, diag::lex_illegal_multiline_string_start)
18591864
.fixItInsert(Lexer::getSourceLoc(CurPtr), "\n");
@@ -2380,6 +2385,11 @@ void Lexer::lexImpl() {
23802385
assert(CurPtr >= BufferStart &&
23812386
CurPtr <= BufferEnd && "Current pointer out of range!");
23822387

2388+
// If we're re-lexing, clear out any previous diagnostics that weren't
2389+
// emitted.
2390+
if (DiagQueue)
2391+
DiagQueue->clear();
2392+
23832393
const char *LeadingTriviaStart = CurPtr;
23842394
if (CurPtr == BufferStart) {
23852395
if (BufferStart < ContentStart) {
@@ -2467,8 +2477,9 @@ void Lexer::lexImpl() {
24672477
case ':': return formToken(tok::colon, TokStart);
24682478
case '\\': return formToken(tok::backslash, TokStart);
24692479

2470-
case '#':
2480+
case '#': {
24712481
// Try lex a raw string literal.
2482+
auto *Diags = getTokenDiags();
24722483
if (unsigned CustomDelimiterLen = advanceIfCustomDelimiter(CurPtr, Diags))
24732484
return lexStringLiteral(CustomDelimiterLen);
24742485

@@ -2479,8 +2490,8 @@ void Lexer::lexImpl() {
24792490

24802491
// Otherwise try lex a magic pound literal.
24812492
return lexHash();
2482-
2483-
// Operator characters.
2493+
}
2494+
// Operator characters.
24842495
case '/':
24852496
if (CurPtr[0] == '/') { // "//"
24862497
skipSlashSlashComment(/*EatNewline=*/true);
@@ -2656,7 +2667,7 @@ StringRef Lexer::lexTrivia(bool IsForTrailingTrivia,
26562667
case 0:
26572668
switch (getNulCharacterKind(CurPtr - 1)) {
26582669
case NulCharacterKind::Embedded: {
2659-
diagnoseEmbeddedNul(Diags, CurPtr - 1);
2670+
diagnoseEmbeddedNul(getTokenDiags(), CurPtr - 1);
26602671
goto Restart;
26612672
}
26622673
case NulCharacterKind::CodeCompletion:

unittests/Parse/LexerTests.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,9 @@ TEST_F(LexerTest, DiagnoseEmbeddedNul) {
804804
LexerMode::Swift, HashbangMode::Disallowed,
805805
CommentRetentionMode::None, TriviaRetentionMode::WithTrivia);
806806

807+
Token Tok;
808+
L.lex(Tok);
809+
807810
ASSERT_TRUE(containsPrefix(DiagConsumer.messages,
808811
"1, 2: nul character embedded in middle of file"));
809812
ASSERT_TRUE(containsPrefix(DiagConsumer.messages,

0 commit comments

Comments
 (0)