Skip to content

Commit 58857fa

Browse files
committed
[Parse] refactor Lexer by made NulCharacterKind type
1 parent 26c08fb commit 58857fa

File tree

2 files changed

+66
-36
lines changed

2 files changed

+66
-36
lines changed

include/swift/Parse/Lexer.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,16 @@ class Lexer {
461461
};
462462

463463
private:
464+
/// Nul character meaning kind.
465+
enum class NulCharacterKind {
466+
/// String buffer terminator.
467+
BufferEnd,
468+
/// Embedded nul character.
469+
Embedded,
470+
/// Code completion marker.
471+
CodeCompletion
472+
};
473+
464474
/// For a source location in the current buffer, returns the corresponding
465475
/// pointer.
466476
const char *getBufferPtrForSourceLoc(SourceLoc Loc) const {
@@ -520,6 +530,8 @@ class Lexer {
520530
/// Try to lex conflict markers by checking for the presence of the start and
521531
/// end of the marker in diff3 or Perforce style respectively.
522532
bool tryLexConflictMarker(bool EatNewline);
533+
534+
NulCharacterKind getNulCharacterKind(const char *Ptr) const;
523535
};
524536

525537
/// Given an ordered token \param Array , get the iterator pointing to the first

lib/Parse/Lexer.cpp

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -351,16 +351,19 @@ void Lexer::skipToEndOfLine(bool EatNewline) {
351351
}
352352
break; // Otherwise, eat other characters.
353353
case 0:
354-
// If this is a random nul character in the middle of a buffer, skip it as
355-
// whitespace.
356-
if (CurPtr-1 != BufferEnd) {
354+
switch (getNulCharacterKind(CurPtr - 1)) {
355+
case NulCharacterKind::Embedded:
356+
// If this is a random nul character in the middle of a buffer, skip it
357+
// as whitespace.
357358
diagnoseEmbeddedNul(Diags, CurPtr-1);
358-
break;
359+
LLVM_FALLTHROUGH;
360+
case NulCharacterKind::CodeCompletion:
361+
continue;
362+
case NulCharacterKind::BufferEnd:
363+
// Otherwise, the last line of the file does not have a newline.
364+
--CurPtr;
365+
return;
359366
}
360-
361-
// Otherwise, the last line of the file does not have a newline.
362-
--CurPtr;
363-
return;
364367
}
365368
}
366369
}
@@ -422,26 +425,30 @@ void Lexer::skipSlashStarComment() {
422425

423426
break; // Otherwise, eat other characters.
424427
case 0:
425-
// If this is a random nul character in the middle of a buffer, skip it as
426-
// whitespace.
427-
if (CurPtr-1 != BufferEnd) {
428-
diagnoseEmbeddedNul(Diags, CurPtr-1);
429-
break;
430-
}
431-
432-
// Otherwise, we have an unterminated /* comment.
433-
--CurPtr;
428+
switch (getNulCharacterKind(CurPtr - 1)) {
429+
case NulCharacterKind::Embedded:
430+
// If this is a random nul character in the middle of a buffer, skip it
431+
// as whitespace.
432+
diagnoseEmbeddedNul(Diags, CurPtr - 1);
433+
LLVM_FALLTHROUGH;
434+
case NulCharacterKind::CodeCompletion:
435+
continue;
436+
case NulCharacterKind::BufferEnd: {
437+
// Otherwise, we have an unterminated /* comment.
438+
--CurPtr;
434439

435-
// Count how many levels deep we are.
436-
llvm::SmallString<8> Terminator("*/");
437-
while (--Depth != 0)
438-
Terminator += "*/";
440+
// Count how many levels deep we are.
441+
llvm::SmallString<8> Terminator("*/");
442+
while (--Depth != 0)
443+
Terminator += "*/";
439444

440-
const char *EOL = (CurPtr[-1] == '\n') ? (CurPtr - 1) : CurPtr;
441-
diagnose(EOL, diag::lex_unterminated_block_comment)
442-
.fixItInsert(getSourceLoc(EOL), Terminator);
443-
diagnose(StartPtr, diag::lex_comment_start);
444-
return;
445+
const char *EOL = (CurPtr[-1] == '\n') ? (CurPtr - 1) : CurPtr;
446+
diagnose(EOL, diag::lex_unterminated_block_comment)
447+
.fixItInsert(getSourceLoc(EOL), Terminator);
448+
diagnose(StartPtr, diag::lex_comment_start);
449+
return;
450+
}
451+
}
445452
}
446453
}
447454
}
@@ -1857,6 +1864,16 @@ bool Lexer::tryLexConflictMarker(bool EatNewline) {
18571864
return false;
18581865
}
18591866

1867+
Lexer::NulCharacterKind Lexer::getNulCharacterKind(const char *Ptr) const {
1868+
assert(Ptr != nullptr && *Ptr == 0);
1869+
if (Ptr == CodeCompletionPtr) {
1870+
return NulCharacterKind::CodeCompletion;
1871+
}
1872+
if (Ptr == BufferEnd) {
1873+
return NulCharacterKind::BufferEnd;
1874+
}
1875+
return NulCharacterKind::Embedded;
1876+
}
18601877

18611878
void Lexer::tryLexEditorPlaceholder() {
18621879
assert(CurPtr[-1] == '<' && CurPtr[0] == '#');
@@ -2164,22 +2181,23 @@ void Lexer::lexImpl() {
21642181
return formToken(tok::unknown, TokStart);
21652182

21662183
case 0:
2167-
if (CurPtr-1 == CodeCompletionPtr)
2184+
switch (getNulCharacterKind(CurPtr - 1)) {
2185+
case NulCharacterKind::CodeCompletion:
21682186
return formToken(tok::code_complete, TokStart);
21692187

2170-
// If this is a random nul character in the middle of a buffer, skip it as
2171-
// whitespace.
2172-
if (CurPtr-1 != BufferEnd) {
2188+
case NulCharacterKind::Embedded:
2189+
// If this is a random nul character in the middle of a buffer, skip it as
2190+
// whitespace.
21732191
diagnoseEmbeddedNul(Diags, CurPtr-1);
21742192
goto Restart;
2193+
case NulCharacterKind::BufferEnd:
2194+
// Otherwise, this is the real end of the buffer. Put CurPtr back into
2195+
// buffer bounds.
2196+
--CurPtr;
2197+
// Return EOF.
2198+
return formToken(tok::eof, TokStart);
21752199
}
21762200

2177-
// Otherwise, this is the real end of the buffer. Put CurPtr back into
2178-
// buffer bounds.
2179-
--CurPtr;
2180-
// Return EOF.
2181-
return formToken(tok::eof, TokStart);
2182-
21832201
case '@': return formToken(tok::at_sign, TokStart);
21842202
case '{': return formToken(tok::l_brace, TokStart);
21852203
case '[': {

0 commit comments

Comments
 (0)