From d6a195fea92c992f85d3c9bae3fad379c01f61c1 Mon Sep 17 00:00:00 2001 From: Yu Hao Date: Mon, 24 Nov 2025 15:19:46 -0800 Subject: [PATCH 1/2] Add merge range-selector --- .../clang/Tooling/Transformer/RangeSelector.h | 4 ++ clang/lib/Tooling/Transformer/Parsing.cpp | 2 +- .../lib/Tooling/Transformer/RangeSelector.cpp | 50 +++++++++++++++++++ clang/unittests/Tooling/RangeSelectorTest.cpp | 32 ++++++++++++ 4 files changed, 87 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Tooling/Transformer/RangeSelector.h b/clang/include/clang/Tooling/Transformer/RangeSelector.h index 462a9da8f10eb..c76a5106edd65 100644 --- a/clang/include/clang/Tooling/Transformer/RangeSelector.h +++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h @@ -37,6 +37,10 @@ RangeSelector enclose(RangeSelector Begin, RangeSelector End); /// Convenience version of \c range where end-points are bound nodes. RangeSelector encloseNodes(std::string BeginID, std::string EndID); +/// Selects the merge of the two ranges, i.e. from min(First.begin, +/// Second.begin) to max(First.end, Second.end). +RangeSelector merge(RangeSelector First, RangeSelector Second); + /// DEPRECATED. Use `enclose`. inline RangeSelector range(RangeSelector Begin, RangeSelector End) { return enclose(std::move(Begin), std::move(End)); diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp index 19a1c7c66df46..f7bffda6967a9 100644 --- a/clang/lib/Tooling/Transformer/Parsing.cpp +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -108,7 +108,7 @@ getBinaryStringSelectors() { static const llvm::StringMap> & getBinaryRangeSelectors() { static const llvm::StringMap> - M = {{"enclose", enclose}, {"between", between}}; + M = {{"enclose", enclose}, {"between", between}, {"merge", merge}}; return M; } diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 54a1590d3106d..948cf9c7e712f 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -178,6 +178,56 @@ RangeSelector transformer::encloseNodes(std::string BeginID, return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID))); } +RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) { + return [First, + Second](const MatchResult &Result) -> Expected { + Expected FirstRange = First(Result); + if (!FirstRange) + return FirstRange.takeError(); + Expected SecondRange = Second(Result); + if (!SecondRange) + return SecondRange.takeError(); + // Result begin loc is the minimum of the begin locs of the two ranges. + SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin() + ? FirstRange->getBegin() + : SecondRange->getBegin(); + if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) { + // Both ranges are token ranges. Just take the maximum of their end locs. + SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd() + ? FirstRange->getEnd() + : SecondRange->getEnd(); + return CharSourceRange::getTokenRange(B, E); + } + SourceLocation FirstE = FirstRange->getEnd(); + if (FirstRange->isTokenRange()) { + // The end of the first range is a token. Need to resolve the token to a + // char range. + CharSourceRange EndRange = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(FirstRange->getEnd()), + *Result.SourceManager, Result.Context->getLangOpts()); + if (EndRange.isInvalid()) + return invalidArgumentError( + "merge: can't resolve first token range to valid source range"); + FirstE = EndRange.getEnd(); + } + SourceLocation SecondE = SecondRange->getEnd(); + if (SecondRange->isTokenRange()) { + // The end of the second range is a token. Need to resolve the token to a + // char range. + CharSourceRange EndRange = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(SecondRange->getEnd()), + *Result.SourceManager, Result.Context->getLangOpts()); + if (EndRange.isInvalid()) + return invalidArgumentError( + "merge: can't resolve second token range to valid source range"); + SecondE = EndRange.getEnd(); + } + // Result end loc is the maximum of the end locs of the two ranges. + SourceLocation E = FirstE > SecondE ? FirstE : SecondE; + return CharSourceRange::getCharRange(B, E); + }; +} + RangeSelector transformer::member(std::string ID) { return [ID](const MatchResult &Result) -> Expected { Expected Node = getNode(Result.Nodes, ID); diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp index d441da165b09b..9feca3ca0bed8 100644 --- a/clang/unittests/Tooling/RangeSelectorTest.cpp +++ b/clang/unittests/Tooling/RangeSelectorTest.cpp @@ -327,6 +327,38 @@ TEST(RangeSelectorTest, EncloseOpGeneralParsed) { EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7")); } +TEST(RangeSelectorTest, MergeOp) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1")), + hasArgument(2, expr().bind("a2"))); + RangeSelector R = merge(node("a0"), node("a1")); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7")); + R = merge(node("a2"), node("a1")); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("7 /* comment */, 9")); +} + +TEST(RangeSelectorTest, MergeOpParsed) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1")), + hasArgument(2, expr().bind("a2"))); + auto R = parseRangeSelector(R"rs(merge(node("a0"), node("a1")))rs"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7")); + R = parseRangeSelector(R"rs(merge(node("a2"), node("a1")))rs"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("7 /* comment */, 9")); +} + TEST(RangeSelectorTest, NodeOpStatement) { StringRef Code = "int f() { return 3; }"; TestMatch Match = matchCode(Code, returnStmt().bind("id")); From a9933031791f121631f00ee5d1836ad8aeb32e1d Mon Sep 17 00:00:00 2001 From: Yu Hao Date: Tue, 25 Nov 2025 17:58:34 -0800 Subject: [PATCH 2/2] Fix loc comparision and get end-of-token --- .../lib/Tooling/Transformer/RangeSelector.cpp | 45 +++++++++++-------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 948cf9c7e712f..68b16f91652fb 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -187,43 +187,50 @@ RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) { Expected SecondRange = Second(Result); if (!SecondRange) return SecondRange.takeError(); + + SourceLocation FirstB = FirstRange->getBegin(); + SourceLocation FirstE = FirstRange->getEnd(); + SourceLocation SecondB = SecondRange->getBegin(); + SourceLocation SecondE = SecondRange->getEnd(); // Result begin loc is the minimum of the begin locs of the two ranges. - SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin() - ? FirstRange->getBegin() - : SecondRange->getBegin(); + SourceLocation B = + Result.SourceManager->isBeforeInTranslationUnit(FirstB, SecondB) + ? FirstB + : SecondB; if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) { // Both ranges are token ranges. Just take the maximum of their end locs. - SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd() - ? FirstRange->getEnd() - : SecondRange->getEnd(); + SourceLocation E = + Result.SourceManager->isBeforeInTranslationUnit(FirstE, SecondE) + ? SecondE + : FirstE; return CharSourceRange::getTokenRange(B, E); } - SourceLocation FirstE = FirstRange->getEnd(); + if (FirstRange->isTokenRange()) { // The end of the first range is a token. Need to resolve the token to a // char range. - CharSourceRange EndRange = Lexer::makeFileCharRange( - CharSourceRange::getTokenRange(FirstRange->getEnd()), - *Result.SourceManager, Result.Context->getLangOpts()); - if (EndRange.isInvalid()) + FirstE = Lexer::getLocForEndOfToken(FirstE, /*Offset=*/0, + *Result.SourceManager, + Result.Context->getLangOpts()); + if (FirstE.isInvalid()) return invalidArgumentError( "merge: can't resolve first token range to valid source range"); - FirstE = EndRange.getEnd(); } - SourceLocation SecondE = SecondRange->getEnd(); if (SecondRange->isTokenRange()) { // The end of the second range is a token. Need to resolve the token to a // char range. - CharSourceRange EndRange = Lexer::makeFileCharRange( - CharSourceRange::getTokenRange(SecondRange->getEnd()), - *Result.SourceManager, Result.Context->getLangOpts()); - if (EndRange.isInvalid()) + SecondE = Lexer::getLocForEndOfToken(SecondE, /*Offset=*/0, + *Result.SourceManager, + Result.Context->getLangOpts()); + if (SecondE.isInvalid()) return invalidArgumentError( "merge: can't resolve second token range to valid source range"); - SecondE = EndRange.getEnd(); } // Result end loc is the maximum of the end locs of the two ranges. - SourceLocation E = FirstE > SecondE ? FirstE : SecondE; + SourceLocation E = + Result.SourceManager->isBeforeInTranslationUnit(FirstE, SecondE) + ? SecondE + : FirstE; return CharSourceRange::getCharRange(B, E); }; }