diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index e58c8bc72ae5b..72364500a48f9 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -527,7 +527,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result, const char *StrData = Buffer.data()+LocInfo.second; - if (!IgnoreWhiteSpace && isWhitespace(StrData[0])) + if (!IgnoreWhiteSpace && isWhitespace(SkipEscapedNewLines(StrData)[0])) return true; // Create a lexer starting at the beginning of this token. diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index 47aa2c131a304..aead7fb899d0a 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -652,6 +652,38 @@ TEST_F(LexerTest, RawAndNormalLexSameForLineComments) { EXPECT_TRUE(ToksView.empty()); } +TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) { + const llvm::StringLiteral Source = R"cc( + #define ONE \ + 1 + + int i = ONE; + )cc"; + std::vector Toks = + CheckLex(Source, {tok::kw_int, tok::identifier, tok::equal, + tok::numeric_constant, tok::semi}); + + // Set up by getting the raw token for the `1` in the macro definition. + const Token &OneExpanded = Toks[3]; + Token Tok; + ASSERT_FALSE( + Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts)); + // The `ONE`. + ASSERT_EQ(Tok.getKind(), tok::raw_identifier); + ASSERT_FALSE( + Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()), + Tok, SourceMgr, LangOpts)); + // The `1` in the macro definition. + ASSERT_EQ(Tok.getKind(), tok::numeric_constant); + + // Go back 4 characters: two spaces, one newline, and the backslash. + SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(-4); + // Expect true (=failure) because the whitespace immediately after the + // escaped newline is not ignored. + EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts, + /*IgnoreWhiteSpace=*/false)); +} + TEST(LexerPreambleTest, PreambleBounds) { std::vector Cases = { R"cc([[