-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[FuncAttrs] Relax norecurse attribute inference #139943
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Usha Gupta (usha1830) ChangesThe current code adds This is a little conservative check and prevents cases if there is self-recursive callee or if the callee is in a recursive chain which does not include current caller. Example scenarios:
main and callee1 could have Similarly, main can have the This PR relaxes the requirement that all callees must have the Patch is 22.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139943.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index f918d7e059b63..293699a1639c6 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -2059,8 +2059,49 @@ static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
AI.run(SCCNodes, Changed);
}
+/// Returns true if N or any function it (transitively) calls makes a call
+/// to an unknown external function: either an indirect call or a declaration
+/// without the NoCallback attribute.
+static bool callsUnknownExternal(LazyCallGraph::Node *N) {
+ std::deque<LazyCallGraph::Node *> Worklist;
+ DenseSet<LazyCallGraph::Node *> Visited;
+ Worklist.push_back(N);
+ Visited.insert(N);
+
+ while (!Worklist.empty()) {
+ auto *Cur = Worklist.front();
+ Worklist.pop_front();
+
+ Function &F = Cur->getFunction();
+ for (auto &BB : F) {
+ for (auto &I : BB.instructionsWithoutDebug()) {
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ const Function *Callee = CB->getCalledFunction();
+ // Indirect call, treat as unknown external function.
+ if (!Callee)
+ return true;
+ // Extern declaration without NoCallback attribute
+ if (Callee->isDeclaration() &&
+ !Callee->hasFnAttribute(Attribute::NoCallback))
+ return true;
+ }
+ }
+ }
+
+ // Enqueue all direct call-edge successors for further scanning
+ for (auto &Edge : Cur->populate().calls()) {
+ LazyCallGraph::Node *Succ = &Edge.getNode();
+ if (Visited.insert(Succ).second)
+ Worklist.push_back(Succ);
+ }
+ }
+
+ return false;
+}
+
static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
- SmallSet<Function *, 8> &Changed) {
+ SmallSet<Function *, 8> &Changed,
+ LazyCallGraph &CG) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
@@ -2071,24 +2112,35 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
return;
- // If all of the calls in F are identifiable and are to norecurse functions, F
- // is norecurse. This check also detects self-recursion as F is not currently
- // marked norecurse, so any called from F to F will not be marked norecurse.
- for (auto &BB : *F)
- for (auto &I : BB.instructionsWithoutDebug())
+ // If all of the calls in F are identifiable and can be proven to not
+ // callback F, F is norecurse. This check also detects self-recursion
+ // as F is not currently marked norecurse, so any call from F to F
+ // will not be marked norecurse.
+ for (auto &BB : *F) {
+ for (auto &I : BB.instructionsWithoutDebug()) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee == F ||
- (!Callee->doesNotRecurse() &&
- !(Callee->isDeclaration() &&
- Callee->hasFnAttribute(Attribute::NoCallback))))
- // Function calls a potentially recursive function.
+
+ if (!Callee || Callee == F)
+ return;
+
+ // External call with NoCallback attribute.
+ if (Callee->isDeclaration()) {
+ if (Callee->hasFnAttribute(Attribute::NoCallback))
+ continue;
return;
+ }
+
+ if (auto *CNode = CG.lookup(*Callee))
+ if (callsUnknownExternal(CNode))
+ return;
}
+ }
+ }
- // Every call was to a non-recursive function other than this function, and
- // we have no indirect recursion as the SCC size is one. This function cannot
- // recurse.
+ // Every call was either to an external function guaranteed to not make a
+ // call to this function or a direct call to internal function and we have no
+ // indirect recursion as the SCC size is one. This function cannot recurse.
F->setDoesNotRecurse();
++NumNoRecurse;
Changed.insert(F);
@@ -2248,7 +2300,7 @@ static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
template <typename AARGetterT>
static SmallSet<Function *, 8>
deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
- bool ArgAttrsOnly) {
+ bool ArgAttrsOnly, LazyCallGraph &CG) {
SCCNodesResult Nodes = createSCCNodeSet(Functions);
// Bail if the SCC only contains optnone functions.
@@ -2279,7 +2331,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
addNoAliasAttrs(Nodes.SCCNodes, Changed);
addNonNullAttrs(Nodes.SCCNodes, Changed);
inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed);
- addNoRecurseAttrs(Nodes.SCCNodes, Changed);
+ addNoRecurseAttrs(Nodes.SCCNodes, Changed, CG);
}
// Finally, infer the maximal set of attributes from the ones we've inferred
@@ -2323,7 +2375,7 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
}
auto ChangedFunctions =
- deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly);
+ deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly, CG);
if (ChangedFunctions.empty())
return PreservedAnalyses::all();
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion.ll
new file mode 100644
index 0000000000000..c0af16b069bde
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
+; RUN: opt < %s -passes=function-attrs -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test includes a call graph with multiple SCCs. The purpose of this is
+; to check that norecurse is not added when a function is part of non-singular
+; SCC.
+; There are three different SCCs in this test:
+; SCC#1: main, foo, bar, foo1, bar1
+; SCC#2: bar2, bar3, bar4
+; SCC#3: baz, fun
+; None of these functions should be marked as norecurse
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar1() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar1(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @main()
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = tail call i32 @main()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local noundef i32 @main() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local noundef i32 @main(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @foo()
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @foo()
+ tail call void @bar2()
+ tail call void @baz()
+ ret i32 0
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @foo1() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @foo1(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar1()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @foo1()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @foo1()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @foo() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @foo(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar4() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar4(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar2() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar2(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar3()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar3()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar3() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar3(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar4()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar4()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @fun() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @fun(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @baz()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @baz() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @baz(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @fun()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @fun()
+ ret void
+}
+
+attributes #0 = { nofree noinline nosync nounwind memory(none) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion1.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion1.ll
new file mode 100644
index 0000000000000..60bbaab2a7d66
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion1.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
+; RUN: opt < %s -passes=function-attrs -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test includes a call graph with multiple SCCs. The purpose of this is
+; to check that norecurse is added to a function which calls a function which
+; is indirectly recursive but is not part of the recursive chain.
+; There are two SCCs in this test:
+; SCC#1: bar2, bar3, bar4
+; SCC#2: baz, fun
+; main() calls bar2 and baz, both of which are part of some indirect recursive
+; chain. but does not call back main() and hence main() can be marked as
+; norecurse.
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local noundef i32 @main() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local noundef i32 @main(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ tail call void @bar2()
+ tail call void @baz()
+ ret i32 0
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar4() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar4(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar2()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar2()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar2() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar2(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar3()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar3()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @bar3() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @bar3(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @bar4()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @bar4()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @fun() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @fun(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @baz()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @baz()
+ ret void
+}
+
+; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+define dso_local void @baz() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
+; CHECK-LABEL: define dso_local void @baz(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @fun()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @fun()
+ ret void
+}
+
+attributes #0 = { nofree noinline nosync nounwind memory(none) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll
new file mode 100644
index 0000000000000..9b87c4e4ad76c
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
+; RUN: opt < %s -passes=function-attrs -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test includes a call graph with a self recursive function.
+; The purpose of this is to check that norecurse is added to functions
+; which have a self-recursive function in the call-chain.
+; The call-chain in this test is as follows
+; main -> bob -> callee1 -> callee2 -> callee3 -> callee4 -> callee5
+; where callee5 is self recursive.
+
+@x = dso_local global i32 4, align 4
+@y = dso_local global i32 2, align 4
+
+; Function Attrs: nofree noinline norecurse nounwind memory(readwrite, argmem: none) uwtable
+define dso_local void @callee6() local_unnamed_addr #0 {
+; CHECK: Function Attrs: nofree noinline norecurse nounwind memory(readwrite, argmem: none) uwtable
+; CHECK-LABEL: define dso_local void @callee6(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @y, align 4, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT: store volatile i32 [[INC]], ptr @y, align 4, !tbaa [[TBAA8]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile i32, ptr @y, align 4, !tbaa !8
+ %inc = add nsw i32 %0, 1
+ store volatile i32 %inc, ptr @y, align 4, !tbaa !8
+ ret void
+}
+
+; Function Attrs: nofree noinline nounwind uwtable
+define dso_local void @callee5(i32 noundef %x) local_unnamed_addr #1 {
+; CHECK: Function Attrs: nofree noinline nounwind uwtable
+; CHECK-LABEL: define dso_local void @callee5(
+; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: tail call void @callee5(i32 noundef [[X]])
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: tail call void @callee6()
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %x, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void @callee5(i32 noundef %x)
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @callee6()
+ ret void
+}
+
+; Function Attrs: nofree noinline nounwind uwtable
+define dso_local void @callee4() local_unnamed_addr #1 {
+; CHECK: Function Attrs: nofree noinline norecurse nounwind uwtable
+; CHECK-LABEL: define dso_local void @callee4(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @x, align 4, !tbaa [[TBAA8]]
+; CHECK-NEXT: tail call void @callee5(i32 noundef [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = load volatile i32, ptr @x, align 4, !tbaa !8
+ tail call void @callee5(i32 noundef %0)
+ ret void
+}
+
+; Function Attrs: nofree noinline nounwind uwtable
+define dso_local void @callee3() local_unnamed_addr #1 {
+; CHECK: Function Attrs: nofree noinline norecurse nounwind uwtable
+; CHECK-LABEL: define dso_local void @callee3(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @callee4()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @callee4()
+ ret void
+}
+
+; Function Attrs: nofree noinline nounwind uwtable
+define dso_local void @callee2() local_unnamed_addr #1 {
+; CHECK: Function Attrs: nofree noinline norecurse nounwind uwtable
+; CHECK-LABEL: define dso_local void @callee2(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @callee3()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @callee3()
+ ret void
+}
+
+; Function Attrs: nofree noinline nounwind uwtable
+define dso_local void @callee1() local_unnamed_addr #1 {
+; CHECK: Function Attrs: nofree noinline norecurse nounwind uwtable
+; CHECK-LABEL: define dso_local void @callee1(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: tail call void @callee2()
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @callee2()
+ ret void
+}
+
+; Function Attrs: nofree noinline nounwind uwtable
+define dso_local void @bob() local...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think FunctionAttrs should be performing inference that inspects more than the current SCC.
Hi @nikic, Thanks for reviewing! Are you referring to the check added to detect unknown external calls in a callee’s call graph? This check is now necessary because we are no longer checking norecurse on each callee. Without this, we might incorrectly infer norecurse for a function that calls another function with unknown external call. For example: An alternative approach could be to ensure that the function F has only direct call users. If there are any non-call uses of F (e.g., being stored to a variable or passed as a function pointer), we should avoid inferring norecurse. This could further uncover more opportunities for inferring norecurse attribute. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reasoning about uses of F is only going to work if the function has local linkage. hasAddressTaken() does not take into account potential uses in other modules.
I was initially thinking of using ModuleSummaryIndex Analysis here to make sure we do this check in post-link LTO stage. We still prevent the addition of norecurse in most cases as earlier, as we are not adding norecurse solely on the basis of this check. Thinking more about this, I can think of one scenario where things might go wrong here. I will look further on this. Thanks again! |
02b506e
to
fa86454
Compare
I've updated the condition checks for callees to better handle unknown external calls. Specifically, to account for the possibility that an external function may call back into the current function, the logic now checks whether the address of any function in the program is taken. If so, and if there are external callees, we conservatively avoid adding the With this approach, we can also infer This check is only applied during the LTO post-link phase, where we have full visibility into all modules. This ensures that any remaining external calls truly originate from outside the program (e.g., libraries), not other LTO modules. Please let me know your thoughts on this approach. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new approach seems reasonable to me. If there are no addresses taken of any function in the entire module, and the only external calls are to well-known library functions it seems safe to mark the function as norecurse
. But it's probably worth splitting this PR up into two parts. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it might be worth pulling the LTO-time changes into a separate PR to keep things simple. That way it also makes to easier to see which test changes are caused by which code change.
eb8193b
to
8df2493
Compare
Thanks @david-arm for pointing out a case in which
Latest commit fixes this. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the changes! I haven't reviewed all the tests yet, but it looks much better. Just left a few comments on things I spotted so far.
llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion.ll
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! Thanks for spending so much time addressing all the comments. The approach seems reasonable and safe to me, but please wait a day or so in case @nikic has any comments.
llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_NoAddressTaken.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_addressTaken.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion.ll
Outdated
Show resolved
Hide resolved
I’d like to go ahead and merge this PR now. |
I have concerns with this patch, I'll try to review soon. |
llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll
Outdated
Show resolved
Hide resolved
I'm not really familiar with it, but it's worth noting that the LazyCallGraph structure represented the call graph as SCCs nested inside RefSCCs, where the latter also represents things like potential calls from outside the module or taking references to functions that may later get called. We usually do inference on SCCs, but I feel like it might make sense to infer norecurse on RefSCC. Would a trivial RefSCC (single element without self-edge) be norecurse without any further analysis? |
@nikic Thank you for taking the time to review again.
I am exploring this possibility. However, I think we would still need the indirect call (which are the potential edges in the RefSCC) to be resolved before we could confidently infer norecurse attribute. And we will still need more checks on the callees for external calls/libcalls etc. |
or is part of a recursive call chain which does not include the caller.
…ould be called back in
…ction callls if none of the user functions have had there address taken
…e are no external references
… LTO to avoid incorrect inference pre-LTO
54598c2
to
dd45387
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
dd45387
to
1a626a6
Compare
@nikic @david-arm The new pass iterates over RefSCCs instead of just SCCs. This is expected to lead to faster inference for modules with multiple SCCs that have indirect calls between them |
…LTO (#158608) This PR, which supersedes #139943, extends the scenarios where the 'norecurse' attribute can be inferred. Currently, the 'norecurse' attribute is only inferred if all called functions also have this attribute. This change introduces a new pass in the LTO pipeline, run after Whole Program Devirtualization, to broaden the inference criteria. The new pass inspects all functions in the module and sets a flag if any functions are external or have their addresses taken (while ignoring those already marked norecurse). This flag is then used with the existing conditions to enable inference in more cases. This enhancement allows 'norecurse' to be applied in situations where a function calls a recursive function, but is not part of the same recursion chain. For example, foo can now be marked 'norecurse' in the following scenarios: `foo -> callee1 -> callee2 -> callee2` In this case, foo and callee1 can both be marked 'norecurse' because they're not part of the callee2 recursion. Similarly, foo can be marked 'norecurse' here: `foo -> callee1 -> callee2 -> callee1` Here, foo is not part of the callee1 -> callee2 -> callee1 recursion chain, so it can be marked 'norecurse'.
Closing this as the superseding PR #158608 is merged. |
…g postlink LTO (#158608) This PR, which supersedes llvm/llvm-project#139943, extends the scenarios where the 'norecurse' attribute can be inferred. Currently, the 'norecurse' attribute is only inferred if all called functions also have this attribute. This change introduces a new pass in the LTO pipeline, run after Whole Program Devirtualization, to broaden the inference criteria. The new pass inspects all functions in the module and sets a flag if any functions are external or have their addresses taken (while ignoring those already marked norecurse). This flag is then used with the existing conditions to enable inference in more cases. This enhancement allows 'norecurse' to be applied in situations where a function calls a recursive function, but is not part of the same recursion chain. For example, foo can now be marked 'norecurse' in the following scenarios: `foo -> callee1 -> callee2 -> callee2` In this case, foo and callee1 can both be marked 'norecurse' because they're not part of the callee2 recursion. Similarly, foo can be marked 'norecurse' here: `foo -> callee1 -> callee2 -> callee1` Here, foo is not part of the callee1 -> callee2 -> callee1 recursion chain, so it can be marked 'norecurse'.
The current code adds
norecurse
attribute to a function only if all of its callee functions havenorecurse
attribute.This is a little conservative check and prevents cases if there is self-recursive callee or if the callee is in a recursive chain which does not include current caller.
Example scenarios:
foo -> callee1 -> callee2 -> callee2
foo and callee1 could have
norecurse
in this case given the other constraints are satisfied.Similarly, foo can have the
norecurse
attribute in the following case.foo -> callee1 -> callee2 -> callee1
This PR relaxes the requirement that all callees must have the
norecurse
attribute.An additional check is added which enables cases like above to be enabled in post-link LTO stage.