-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[clang][transformer] Add merge range-selector for selecting the merge of two ranges.
#169560
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-clang Author: Yu Hao (yuhaouy) ChangesThis new range-selector The existing Full diff: https://github.com/llvm/llvm-project/pull/169560.diff 4 Files Affected:
diff --git a/clang/include/clang/Tooling/Transformer/RangeSelector.h b/clang/include/clang/Tooling/Transformer/RangeSelector.h
index 462a9da8f10eb..c76a5106edd65 100644
--- a/clang/include/clang/Tooling/Transformer/RangeSelector.h
+++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h
@@ -37,6 +37,10 @@ RangeSelector enclose(RangeSelector Begin, RangeSelector End);
/// Convenience version of \c range where end-points are bound nodes.
RangeSelector encloseNodes(std::string BeginID, std::string EndID);
+/// Selects the merge of the two ranges, i.e. from min(First.begin,
+/// Second.begin) to max(First.end, Second.end).
+RangeSelector merge(RangeSelector First, RangeSelector Second);
+
/// DEPRECATED. Use `enclose`.
inline RangeSelector range(RangeSelector Begin, RangeSelector End) {
return enclose(std::move(Begin), std::move(End));
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
index 19a1c7c66df46..f7bffda6967a9 100644
--- a/clang/lib/Tooling/Transformer/Parsing.cpp
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -108,7 +108,7 @@ getBinaryStringSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors() {
static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
- M = {{"enclose", enclose}, {"between", between}};
+ M = {{"enclose", enclose}, {"between", between}, {"merge", merge}};
return M;
}
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 54a1590d3106d..948cf9c7e712f 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -178,6 +178,56 @@ RangeSelector transformer::encloseNodes(std::string BeginID,
return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
}
+RangeSelector transformer::merge(RangeSelector First, RangeSelector Second) {
+ return [First,
+ Second](const MatchResult &Result) -> Expected<CharSourceRange> {
+ Expected<CharSourceRange> FirstRange = First(Result);
+ if (!FirstRange)
+ return FirstRange.takeError();
+ Expected<CharSourceRange> SecondRange = Second(Result);
+ if (!SecondRange)
+ return SecondRange.takeError();
+ // Result begin loc is the minimum of the begin locs of the two ranges.
+ SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin()
+ ? FirstRange->getBegin()
+ : SecondRange->getBegin();
+ if (FirstRange->isTokenRange() && SecondRange->isTokenRange()) {
+ // Both ranges are token ranges. Just take the maximum of their end locs.
+ SourceLocation E = FirstRange->getEnd() > SecondRange->getEnd()
+ ? FirstRange->getEnd()
+ : SecondRange->getEnd();
+ return CharSourceRange::getTokenRange(B, E);
+ }
+ SourceLocation FirstE = FirstRange->getEnd();
+ if (FirstRange->isTokenRange()) {
+ // The end of the first range is a token. Need to resolve the token to a
+ // char range.
+ CharSourceRange EndRange = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(FirstRange->getEnd()),
+ *Result.SourceManager, Result.Context->getLangOpts());
+ if (EndRange.isInvalid())
+ return invalidArgumentError(
+ "merge: can't resolve first token range to valid source range");
+ FirstE = EndRange.getEnd();
+ }
+ SourceLocation SecondE = SecondRange->getEnd();
+ if (SecondRange->isTokenRange()) {
+ // The end of the second range is a token. Need to resolve the token to a
+ // char range.
+ CharSourceRange EndRange = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(SecondRange->getEnd()),
+ *Result.SourceManager, Result.Context->getLangOpts());
+ if (EndRange.isInvalid())
+ return invalidArgumentError(
+ "merge: can't resolve second token range to valid source range");
+ SecondE = EndRange.getEnd();
+ }
+ // Result end loc is the maximum of the end locs of the two ranges.
+ SourceLocation E = FirstE > SecondE ? FirstE : SecondE;
+ return CharSourceRange::getCharRange(B, E);
+ };
+}
+
RangeSelector transformer::member(std::string ID) {
return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp
index d441da165b09b..9feca3ca0bed8 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -327,6 +327,38 @@ TEST(RangeSelectorTest, EncloseOpGeneralParsed) {
EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
}
+TEST(RangeSelectorTest, MergeOp) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")),
+ hasArgument(2, expr().bind("a2")));
+ RangeSelector R = merge(node("a0"), node("a1"));
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7"));
+ R = merge(node("a2"), node("a1"));
+ EXPECT_THAT_EXPECTED(select(R, Match), HasValue("7 /* comment */, 9"));
+}
+
+TEST(RangeSelectorTest, MergeOpParsed) {
+ StringRef Code = R"cc(
+ int f(int x, int y, int z) { return 3; }
+ int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+ )cc";
+ auto Matcher = callExpr(hasArgument(0, expr().bind("a0")),
+ hasArgument(1, expr().bind("a1")),
+ hasArgument(2, expr().bind("a2")));
+ auto R = parseRangeSelector(R"rs(merge(node("a0"), node("a1")))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ TestMatch Match = matchCode(Code, Matcher);
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7"));
+ R = parseRangeSelector(R"rs(merge(node("a2"), node("a1")))rs");
+ ASSERT_THAT_EXPECTED(R, llvm::Succeeded());
+ EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("7 /* comment */, 9"));
+}
+
TEST(RangeSelectorTest, NodeOpStatement) {
StringRef Code = "int f() { return 3; }";
TestMatch Match = matchCode(Code, returnStmt().bind("id"));
|
ymand
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
API addition looks, but some questions about implementation given the subtleties of ranges :(
| if (!SecondRange) | ||
| return SecondRange.takeError(); | ||
| // Result begin loc is the minimum of the begin locs of the two ranges. | ||
| SourceLocation B = FirstRange->getBegin() < SecondRange->getBegin() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we generally don't compare source ranges when we care about semantic details. Instead, we use APIs like isBeforeInTranslationUnit. I don't remember the subtleties offhand, but that's what we use e.g. on line 169 and should be a good start at least. I would recommend you look at the declaration and implementation of makeFileCharRange to get a sense for the subtleties involved here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! Fixed.
| if (FirstRange->isTokenRange()) { | ||
| // The end of the first range is a token. Need to resolve the token to a | ||
| // char range. | ||
| CharSourceRange EndRange = Lexer::makeFileCharRange( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would Lexer::getLocForEndOfToken be enough here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think so. Was using makeFileCharRange because after at line 120 uses it. Changed to use getLocForEndOfToken .
This new range-selector
mergetakes in two ranges and selects from min(begin locs of input ranges) to max(end locs of input ranges). This is useful for when the user needs to select a range that is a merge of two arbitrary ranges (potentially overlapped and out of order).The existing
encloserange-selector does something similar but it requires the first range's begin loc appears before the second range's end loc. Themergerange-selector complementsenclose.