Skip to content

Commit 4146793

Browse files
committed
[clang][refactor] Move utilities from clang_tidy::LexerUtils to clang::Lexer
To make them more widely available and reusable. Add unit tests.
1 parent 71d6287 commit 4146793

File tree

4 files changed

+70
-17
lines changed

4 files changed

+70
-17
lines changed

clang-tools-extra/clang-tidy/utils/LexerUtils.cpp

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,16 @@ namespace clang::tidy::utils::lexer {
1717
std::pair<Token, SourceLocation>
1818
getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
1919
const LangOptions &LangOpts, bool SkipComments) {
20-
Token Token;
21-
Token.setKind(tok::unknown);
20+
const std::optional<Token> Tok =
21+
Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
2222

23-
Location = Location.getLocWithOffset(-1);
24-
if (Location.isInvalid())
25-
return {Token, Location};
26-
27-
const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
28-
while (Location != StartOfFile) {
29-
Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
30-
if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
31-
(!SkipComments || !Token.is(tok::comment))) {
32-
break;
33-
}
34-
if (Location == StartOfFile)
35-
return {Token, Location};
36-
Location = Location.getLocWithOffset(-1);
23+
if (Tok.has_value()) {
24+
return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
3725
}
38-
return {Token, Location};
26+
27+
Token Token;
28+
Token.setKind(tok::unknown);
29+
return {Token, SourceLocation()};
3930
}
4031

4132
Token getPreviousToken(SourceLocation Location, const SourceManager &SM,

clang/include/clang/Lex/Lexer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,12 @@ class Lexer : public PreprocessorLexer {
557557
const LangOptions &LangOpts,
558558
bool IncludeComments = false);
559559

560+
/// Finds the token that comes before the given location.
561+
static std::optional<Token> findPreviousToken(SourceLocation Loc,
562+
const SourceManager &SM,
563+
const LangOptions &LangOpts,
564+
bool IncludeComments);
565+
560566
/// Checks that the given token is the first token that occurs after
561567
/// the given location (this excludes comments and whitespace). Returns the
562568
/// location immediately after the specified token. If the token is not found

clang/lib/Lex/Lexer.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,27 @@ std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
13521352
return Tok;
13531353
}
13541354

1355+
std::optional<Token> Lexer::findPreviousToken(SourceLocation Loc,
1356+
const SourceManager &SM,
1357+
const LangOptions &LangOpts,
1358+
bool IncludeComments) {
1359+
const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Loc));
1360+
while (Loc != StartOfFile) {
1361+
Loc = Loc.getLocWithOffset(-1);
1362+
if (Loc.isInvalid())
1363+
return std::nullopt;
1364+
1365+
Loc = GetBeginningOfToken(Loc, SM, LangOpts);
1366+
Token Tok;
1367+
if (getRawToken(Loc, Tok, SM, LangOpts))
1368+
continue; // Not a token, go to prev location.
1369+
if (!Tok.is(tok::comment) || IncludeComments) {
1370+
return Tok;
1371+
}
1372+
}
1373+
return std::nullopt;
1374+
}
1375+
13551376
/// Checks that the given token is the first token that occurs after the
13561377
/// given location (this excludes comments and whitespace). Returns the location
13571378
/// immediately after the specified token. If the token is not found or the

clang/unittests/Lex/LexerTest.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,41 @@ TEST_F(LexerTest, FindNextTokenIncludingComments) {
640640
"=", "abcd", ";"));
641641
}
642642

643+
TEST_F(LexerTest, FindPreviousToken) {
644+
Lex("int abcd = 0;\n"
645+
"// A comment.\n"
646+
"int xyz = abcd;\n");
647+
std::vector<std::string> GeneratedByPrevToken;
648+
SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
649+
while (true) {
650+
auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, false);
651+
if (!T.has_value())
652+
break;
653+
GeneratedByPrevToken.push_back(getSourceText(*T, *T));
654+
Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
655+
}
656+
EXPECT_THAT(GeneratedByPrevToken, ElementsAre(";", "abcd", "=", "xyz", "int",
657+
";", "0", "=", "abcd", "int"));
658+
}
659+
660+
TEST_F(LexerTest, FindPreviousTokenIncludingComments) {
661+
Lex("int abcd = 0;\n"
662+
"// A comment.\n"
663+
"int xyz = abcd;\n");
664+
std::vector<std::string> GeneratedByPrevToken;
665+
SourceLocation Loc = SourceMgr.getLocForEndOfFile(SourceMgr.getMainFileID());
666+
while (true) {
667+
auto T = Lexer::findPreviousToken(Loc, SourceMgr, LangOpts, true);
668+
if (!T.has_value())
669+
break;
670+
GeneratedByPrevToken.push_back(getSourceText(*T, *T));
671+
Loc = Lexer::GetBeginningOfToken(T->getLocation(), SourceMgr, LangOpts);
672+
}
673+
EXPECT_THAT(GeneratedByPrevToken,
674+
ElementsAre(";", "abcd", "=", "xyz", "int", "// A comment.", ";",
675+
"0", "=", "abcd", "int"));
676+
}
677+
643678
TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
644679
TrivialModuleLoader ModLoader;
645680
auto PP = CreatePP("", ModLoader);

0 commit comments

Comments
 (0)