Skip to content

Commit 7594cfb

Browse files
committed
[Parse] Update Lexer::getLocFor*Line methods to return correct locations
When the location given to getLocForStartOfLine was an empty line, it would actually return the location of the next line rather than the given location as it should. If the location given to getLocForEndOfLine was inside a token on a line that was either empty or contained whitespace, it would skip to the end of that token and then return the location for the next line. This was an issue for multiline strings, where the string is a single token but it's over multiple lines.
1 parent 671be54 commit 7594cfb

File tree

2 files changed

+13
-16
lines changed

2 files changed

+13
-16
lines changed

include/swift/Parse/Lexer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ class Lexer {
352352
static SourceLoc getLocForStartOfLine(SourceManager &SM, SourceLoc Loc);
353353

354354
/// Retrieve the source location for the end of the line containing the
355-
/// given token, which is the location of the start of the next line.
355+
/// given location, which is the location of the start of the next line.
356356
static SourceLoc getLocForEndOfLine(SourceManager &SM, SourceLoc Loc);
357357

358358
/// Retrieve the string used to indent the line that contains the given

lib/Parse/Lexer.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2706,12 +2706,12 @@ static SourceLoc getLocForStartOfTokenInBuf(SourceManager &SM,
27062706
// Find the start of the given line.
27072707
static const char *findStartOfLine(const char *bufStart, const char *current) {
27082708
while (current != bufStart) {
2709-
if (current[0] == '\n' || current[0] == '\r') {
2709+
--current;
2710+
2711+
if (current[0] == '\n') {
27102712
++current;
27112713
break;
27122714
}
2713-
2714-
--current;
27152715
}
27162716

27172717
return current;
@@ -2779,19 +2779,16 @@ SourceLoc Lexer::getLocForEndOfLine(SourceManager &SM, SourceLoc Loc) {
27792779
if (BufferID < 0)
27802780
return SourceLoc();
27812781

2782-
// Use fake language options; language options only affect validity
2783-
// and the exact token produced.
2784-
LangOptions FakeLangOpts;
2782+
CharSourceRange entireRange = SM.getRangeForBuffer(BufferID);
2783+
StringRef Buffer = SM.extractText(entireRange);
27852784

2786-
// Here we return comments as tokens because either the caller skipped
2787-
// comments and normally we won't be at the beginning of a comment token
2788-
// (making this option irrelevant), or the caller lexed comments and
2789-
// we need to lex just the comment token.
2790-
Lexer L(FakeLangOpts, SM, BufferID, nullptr, LexerMode::Swift,
2791-
HashbangMode::Allowed, CommentRetentionMode::ReturnAsTokens);
2792-
L.restoreState(State(Loc));
2793-
L.skipToEndOfLine(/*EatNewline=*/true);
2794-
return getSourceLoc(L.CurPtr);
2785+
// Windows line endings are \r\n. Since we want the start of the next
2786+
// line, just look for \n so the \r is skipped through.
2787+
size_t Offset = SM.getLocOffsetInBuffer(Loc, BufferID);
2788+
Offset = Buffer.find('\n', Offset);
2789+
if (Offset == StringRef::npos)
2790+
return SourceLoc();
2791+
return getSourceLoc(Buffer.data() + Offset + 1);
27952792
}
27962793

27972794
StringRef Lexer::getIndentationForLine(SourceManager &SM, SourceLoc Loc,

0 commit comments

Comments
 (0)