Skip to content

Commit 2bf5e4e

Browse files
committed
[Lexer] Extract trivia piece lexing to a separate TriviaLexer
The lexer is only responsible for skipping over trivia and noting their length. A separate TriviaLexer can be invoked to split the raw trivia string into its pieces. Since most of the time the trivia pieces aren't needed, this will allow us to later only parse trivia into pieces when they are explicitly needed.
1 parent 2cfa57d commit 2bf5e4e

File tree

2 files changed

+174
-30
lines changed

2 files changed

+174
-30
lines changed

include/swift/Parse/Lexer.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,7 @@ class Lexer {
551551
void lexHexNumber();
552552
void lexNumber();
553553
void lexTrivia(ParsedTrivia &T, bool IsForTrailingTrivia);
554+
StringRef lexTrivia(bool IsForTrailingTrivia);
554555
static unsigned lexUnicodeEscape(const char *&CurPtr, Lexer *Diags);
555556

556557
unsigned lexCharacter(const char *&CurPtr, char StopQuote,
@@ -572,7 +573,14 @@ class Lexer {
572573

573574
NulCharacterKind getNulCharacterKind(const char *Ptr) const;
574575
};
575-
576+
577+
/// A lexer that can lex trivia into its pieces
578+
class TriviaLexer {
579+
public:
580+
/// Decompose the triva in \p TriviaStr into their pieces.
581+
static ParsedTrivia lexTrivia(StringRef TriviaStr);
582+
};
583+
576584
/// Given an ordered token \param Array , get the iterator pointing to the first
577585
/// token that is not before \param Loc .
578586
template<typename ArrayTy, typename Iterator = typename ArrayTy::iterator>

lib/Parse/Lexer.cpp

Lines changed: 165 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2531,6 +2531,15 @@ Token Lexer::getTokenAtLocation(const SourceManager &SM, SourceLoc Loc,
25312531
}
25322532

25332533
void Lexer::lexTrivia(ParsedTrivia &Pieces, bool IsForTrailingTrivia) {
2534+
auto TriviaString = lexTrivia(IsForTrailingTrivia);
2535+
auto ParsedPieces = TriviaLexer::lexTrivia(TriviaString);
2536+
Pieces.Pieces.insert(Pieces.Pieces.end(), ParsedPieces.Pieces.begin(),
2537+
ParsedPieces.Pieces.end());
2538+
}
2539+
2540+
StringRef Lexer::lexTrivia(bool IsForTrailingTrivia) {
2541+
const char *AllTriviaStart = CurPtr;
2542+
25342543
Restart:
25352544
const char *TriviaStart = CurPtr;
25362545

@@ -2539,30 +2548,19 @@ void Lexer::lexTrivia(ParsedTrivia &Pieces, bool IsForTrailingTrivia) {
25392548
if (IsForTrailingTrivia)
25402549
break;
25412550
NextToken.setAtStartOfLine(true);
2542-
Pieces.appendOrSquash(TriviaKind::Newline, 1);
25432551
goto Restart;
25442552
case '\r':
25452553
if (IsForTrailingTrivia)
25462554
break;
25472555
NextToken.setAtStartOfLine(true);
25482556
if (CurPtr[0] == '\n') {
2549-
Pieces.appendOrSquash(TriviaKind::CarriageReturnLineFeed, 2);
25502557
++CurPtr;
2551-
} else {
2552-
Pieces.appendOrSquash(TriviaKind::CarriageReturn, 1);
25532558
}
25542559
goto Restart;
25552560
case ' ':
2556-
Pieces.appendOrSquash(TriviaKind::Space, 1);
2557-
goto Restart;
25582561
case '\t':
2559-
Pieces.appendOrSquash(TriviaKind::Tab, 1);
2560-
goto Restart;
25612562
case '\v':
2562-
Pieces.appendOrSquash(TriviaKind::VerticalTab, 1);
2563-
goto Restart;
25642563
case '\f':
2565-
Pieces.appendOrSquash(TriviaKind::Formfeed, 1);
25662564
goto Restart;
25672565
case '/':
25682566
if (IsForTrailingTrivia || isKeepingComments()) {
@@ -2571,19 +2569,11 @@ void Lexer::lexTrivia(ParsedTrivia &Pieces, bool IsForTrailingTrivia) {
25712569
break;
25722570
} else if (*CurPtr == '/') {
25732571
// '// ...' comment.
2574-
bool isDocComment = CurPtr[1] == '/';
25752572
skipSlashSlashComment(/*EatNewline=*/false);
2576-
size_t Length = CurPtr - TriviaStart;
2577-
Pieces.push_back(isDocComment ? TriviaKind::DocLineComment
2578-
: TriviaKind::LineComment, Length);
25792573
goto Restart;
25802574
} else if (*CurPtr == '*') {
25812575
// '/* ... */' comment.
2582-
bool isDocComment = CurPtr[1] == '*';
25832576
skipSlashStarComment();
2584-
size_t Length = CurPtr - TriviaStart;
2585-
Pieces.push_back(isDocComment ? TriviaKind::DocBlockComment
2586-
: TriviaKind::BlockComment, Length);
25872577
goto Restart;
25882578
}
25892579
break;
@@ -2594,26 +2584,20 @@ void Lexer::lexTrivia(ParsedTrivia &Pieces, bool IsForTrailingTrivia) {
25942584
if (!IsHashbangAllowed)
25952585
diagnose(TriviaStart, diag::lex_hashbang_not_allowed);
25962586
skipHashbang(/*EatNewline=*/false);
2597-
size_t Length = CurPtr - TriviaStart;
2598-
Pieces.push_back(TriviaKind::GarbageText, Length);
25992587
goto Restart;
26002588
}
26012589
break;
26022590
case '<':
26032591
case '>':
26042592
if (tryLexConflictMarker(/*EatNewline=*/false)) {
26052593
// Conflict marker.
2606-
size_t Length = CurPtr - TriviaStart;
2607-
Pieces.push_back(TriviaKind::GarbageText, Length);
26082594
goto Restart;
26092595
}
26102596
break;
26112597
case 0:
26122598
switch (getNulCharacterKind(CurPtr - 1)) {
26132599
case NulCharacterKind::Embedded: {
26142600
diagnoseEmbeddedNul(Diags, CurPtr - 1);
2615-
size_t Length = CurPtr - TriviaStart;
2616-
Pieces.push_back(TriviaKind::GarbageText, Length);
26172601
goto Restart;
26182602
}
26192603
case NulCharacterKind::CodeCompletion:
@@ -2655,15 +2639,15 @@ void Lexer::lexTrivia(ParsedTrivia &Pieces, bool IsForTrailingTrivia) {
26552639
bool ShouldTokenize = lexUnknown(/*EmitDiagnosticsIfToken=*/false);
26562640
if (ShouldTokenize) {
26572641
CurPtr = Tmp;
2658-
return;
2642+
size_t Length = CurPtr - AllTriviaStart;
2643+
return StringRef(AllTriviaStart, Length);
26592644
}
2660-
2661-
size_t Length = CurPtr - TriviaStart;
2662-
Pieces.push_back(TriviaKind::GarbageText, Length);
26632645
goto Restart;
26642646
}
26652647
// Reset the cursor.
26662648
--CurPtr;
2649+
size_t Length = CurPtr - AllTriviaStart;
2650+
return StringRef(AllTriviaStart, Length);
26672651
}
26682652

26692653
SourceLoc Lexer::getLocForEndOfToken(const SourceManager &SM, SourceLoc Loc) {
@@ -2846,6 +2830,158 @@ StringRef Lexer::getIndentationForLine(SourceManager &SM, SourceLoc Loc,
28462830
return StringRef(StartOfLine, EndOfIndentation - StartOfLine);
28472831
}
28482832

2833+
bool tryAdvanceToEndOfConflictMarker(const char *&CurPtr,
2834+
const char *BufferEnd) {
2835+
const char *Ptr = CurPtr - 1;
2836+
2837+
// Check to see if we have <<<<<<< or >>>>.
2838+
StringRef restOfBuffer(Ptr, BufferEnd - Ptr);
2839+
if (!restOfBuffer.startswith("<<<<<<< ") && !restOfBuffer.startswith(">>>> "))
2840+
return false;
2841+
2842+
ConflictMarkerKind Kind =
2843+
*Ptr == '<' ? ConflictMarkerKind::Normal : ConflictMarkerKind::Perforce;
2844+
if (const char *End = findConflictEnd(Ptr, BufferEnd, Kind)) {
2845+
CurPtr = End;
2846+
2847+
// Skip ahead to the end of the marker.
2848+
if (CurPtr != BufferEnd) {
2849+
advanceToEndOfLine(CurPtr, End);
2850+
}
2851+
2852+
return true;
2853+
}
2854+
2855+
// No end of conflict marker found.
2856+
return false;
2857+
}
2858+
2859+
ParsedTrivia TriviaLexer::lexTrivia(StringRef TriviaStr) {
2860+
const char *CurPtr = TriviaStr.begin();
2861+
const char *BufferEnd = TriviaStr.end();
2862+
2863+
ParsedTrivia Pieces;
2864+
2865+
while (CurPtr < BufferEnd) {
2866+
// Iterate through the trivia and lex them into pieces. In the switch
2867+
// statement in this loop we can
2868+
// - 'continue' if we have successfully lexed a trivia piece to continue
2869+
// with the next piece. In this case CurPtr points to the next character
2870+
// to be lexed (which is not part of the lexed trivia).
2871+
// - 'break' to perform the default handling defined towards the bottom of
2872+
// the loop.
2873+
2874+
const char *TriviaStart = CurPtr;
2875+
2876+
signed char CurChar = (signed char)*CurPtr;
2877+
CurPtr++;
2878+
2879+
switch (CurChar) {
2880+
case '\n':
2881+
Pieces.appendOrSquash(TriviaKind::Newline, 1);
2882+
continue;
2883+
case '\r':
2884+
if (CurPtr[0] == '\n') {
2885+
Pieces.appendOrSquash(TriviaKind::CarriageReturnLineFeed, 2);
2886+
++CurPtr;
2887+
continue;
2888+
} else {
2889+
Pieces.appendOrSquash(TriviaKind::CarriageReturn, 1);
2890+
continue;
2891+
}
2892+
case ' ':
2893+
Pieces.appendOrSquash(TriviaKind::Space, 1);
2894+
continue;
2895+
case '\t':
2896+
Pieces.appendOrSquash(TriviaKind::Tab, 1);
2897+
continue;
2898+
case '\v':
2899+
Pieces.appendOrSquash(TriviaKind::VerticalTab, 1);
2900+
continue;
2901+
case '\f':
2902+
Pieces.appendOrSquash(TriviaKind::Formfeed, 1);
2903+
continue;
2904+
case '/':
2905+
if (*CurPtr == '/') {
2906+
// '// ...' comment.
2907+
bool isDocComment = CurPtr[1] == '/';
2908+
advanceToEndOfLine(CurPtr, BufferEnd);
2909+
size_t Length = CurPtr - TriviaStart;
2910+
Pieces.push_back(isDocComment ? TriviaKind::DocLineComment
2911+
: TriviaKind::LineComment,
2912+
Length);
2913+
continue;
2914+
} else if (*CurPtr == '*') {
2915+
// '/* ... */' comment.
2916+
bool isDocComment = CurPtr[1] == '*';
2917+
skipToEndOfSlashStarComment(CurPtr, BufferEnd);
2918+
size_t Length = CurPtr - TriviaStart;
2919+
Pieces.push_back(isDocComment ? TriviaKind::DocBlockComment
2920+
: TriviaKind::BlockComment,
2921+
Length);
2922+
continue;
2923+
}
2924+
break;
2925+
case '#':
2926+
if (*CurPtr == '!') {
2927+
// Hashbang '#!/path/to/swift'.
2928+
advanceToEndOfLine(CurPtr, BufferEnd);
2929+
size_t Length = CurPtr - TriviaStart;
2930+
Pieces.push_back(TriviaKind::GarbageText, Length);
2931+
continue;
2932+
}
2933+
break;
2934+
case '<':
2935+
case '>':
2936+
if (tryAdvanceToEndOfConflictMarker(CurPtr, BufferEnd)) {
2937+
// Conflict marker.
2938+
size_t Length = CurPtr - TriviaStart;
2939+
Pieces.push_back(TriviaKind::GarbageText, Length);
2940+
continue;
2941+
}
2942+
break;
2943+
case 0: {
2944+
size_t Length = CurPtr - TriviaStart;
2945+
Pieces.push_back(TriviaKind::GarbageText, Length);
2946+
continue;
2947+
}
2948+
default:
2949+
break;
2950+
}
2951+
2952+
// Default handling for anything that didn't 'continue' in the above switch
2953+
// statement.
2954+
2955+
for (; CurPtr < BufferEnd; ++CurPtr) {
2956+
bool HasFoundNextTriviaStart = false;
2957+
switch (*CurPtr) {
2958+
case '\n':
2959+
case '\r':
2960+
case ' ':
2961+
case '\t':
2962+
case '\v':
2963+
case '\f':
2964+
case '/':
2965+
case 0:
2966+
HasFoundNextTriviaStart = true;
2967+
break;
2968+
}
2969+
if (HasFoundNextTriviaStart) {
2970+
break;
2971+
}
2972+
}
2973+
2974+
size_t Length = CurPtr - TriviaStart;
2975+
Pieces.push_back(TriviaKind::GarbageText, Length);
2976+
continue;
2977+
}
2978+
2979+
assert(Pieces.getLength() == TriviaStr.size() &&
2980+
"Not all characters in the source string have been used in trivia "
2981+
"pieces");
2982+
return Pieces;
2983+
}
2984+
28492985
ArrayRef<Token> swift::
28502986
slice_token_array(ArrayRef<Token> AllTokens, SourceLoc StartLoc,
28512987
SourceLoc EndLoc) {

0 commit comments

Comments
 (0)