Skip to content

Commit 53d477b

Browse files
committed
Apply optimizations based on linkage and function address only during LTO to avoid incorrect inference pre-LTO
1 parent 0a422d9 commit 53d477b

File tree

4 files changed

+130
-137
lines changed

4 files changed

+130
-137
lines changed

llvm/lib/Transforms/IPO/FunctionAttrs.cpp

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,18 +2094,16 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
20942094
if (Callee->doesNotRecurse())
20952095
continue;
20962096

2097-
if (Callee->isDeclaration()) {
2098-
if (Callee->hasFnAttribute(Attribute::NoCallback) ||
2099-
NoFunctionsAddressIsTaken)
2100-
continue;
2101-
return;
2102-
} else if (F->hasAddressTaken() || !F->hasLocalLinkage()) {
2103-
// Control reaches here only for callees which are defined in this
2104-
// module and do not satisfy conditions for norecurse attribute.
2105-
// In such a case, if function F has external linkage or address
2106-
// taken, conversatively avoid adding norecurse.
2107-
return;
2108-
}
2097+
// If there are no functions with external linkage and none of the
2098+
// functions' address is taken, it ensures that this Callee does not
2099+
// have any path leading back to the Caller F.
2100+
// The 'NoFunctionsAddressIsTaken' flag is only set during post-link
2101+
// LTO phase after examining all available function definitions.
2102+
if (NoFunctionsAddressIsTaken ||
2103+
(Callee->isDeclaration() &&
2104+
Callee->hasFnAttribute(Attribute::NoCallback)))
2105+
continue;
2106+
return;
21092107
}
21102108
}
21112109
}
@@ -2330,10 +2328,11 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
23302328
}
23312329

23322330
bool NoFunctionsAddressIsTaken = false;
2333-
// Check if any function in the whole program has its address taken.
2331+
// Check if any function in the whole program has its address taken or has
2332+
// potentially external linkage.
23342333
// We use this information when inferring norecurse attribute: If there is
2335-
// no function whose address is taken, we conclude that any external function
2336-
// cannot callback into any user function.
2334+
// no function whose address is taken and all functions have internal
2335+
// linkage, there is no path for a callback to any user function.
23372336
if (IsLTOPostLink) {
23382337
bool AnyFunctionsAddressIsTaken = false;
23392338
// Get the parent Module of the Function
@@ -2344,13 +2343,20 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
23442343
if (F.isDeclaration())
23452344
continue;
23462345

2346+
// If the function is already marked as norecurse, this should not block
2347+
// norecurse inference even though it may have external linkage.
2348+
// For ex: main() in C++.
2349+
if (F.doesNotRecurse())
2350+
continue;
2351+
23472352
if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
23482353
AnyFunctionsAddressIsTaken = true;
23492354
break; // break if we found one
23502355
}
23512356
}
23522357
NoFunctionsAddressIsTaken = !AnyFunctionsAddressIsTaken;
23532358
}
2359+
23542360
auto ChangedFunctions = deriveAttrsInPostOrder(
23552361
Functions, AARGetter, ArgAttrsOnly, NoFunctionsAddressIsTaken);
23562362

llvm/test/Transforms/FunctionAttrs/norecurse_multiSCC_indirect_recursion.ll

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
2-
; RUN: opt < %s -passes=function-attrs -S | FileCheck %s
3-
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
4-
target triple = "aarch64-unknown-linux-gnu"
2+
; RUN: opt < %s -passes="lto<O2>" -S | FileCheck %s
53

64
; This test includes a call graph with multiple SCCs. The purpose of this is
75
; to check that norecurse is not added when a function is part of non-singular
@@ -15,8 +13,8 @@ target triple = "aarch64-unknown-linux-gnu"
1513
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
1614
define internal void @bar1() local_unnamed_addr #0 {
1715
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
18-
; CHECK-LABEL: define internal void @bar1(
19-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
16+
; CHECK-LABEL: define internal fastcc void @bar1(
17+
; CHECK-SAME: ) unnamed_addr #[[ATTR0:[0-9]+]] {
2018
; CHECK-NEXT: [[ENTRY:.*:]]
2119
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @main()
2220
; CHECK-NEXT: ret void
@@ -32,9 +30,9 @@ define dso_local noundef i32 @main() local_unnamed_addr #0 {
3230
; CHECK-LABEL: define dso_local noundef i32 @main(
3331
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
3432
; CHECK-NEXT: [[ENTRY:.*:]]
35-
; CHECK-NEXT: tail call void @foo()
36-
; CHECK-NEXT: tail call void @bar2()
37-
; CHECK-NEXT: tail call void @baz()
33+
; CHECK-NEXT: tail call fastcc void @foo()
34+
; CHECK-NEXT: tail call fastcc void @bar2()
35+
; CHECK-NEXT: tail call fastcc void @baz()
3836
; CHECK-NEXT: ret i32 0
3937
;
4038
entry:
@@ -47,10 +45,10 @@ entry:
4745
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
4846
define internal void @foo1() local_unnamed_addr #0 {
4947
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
50-
; CHECK-LABEL: define internal void @foo1(
51-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
48+
; CHECK-LABEL: define internal fastcc void @foo1(
49+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
5250
; CHECK-NEXT: [[ENTRY:.*:]]
53-
; CHECK-NEXT: tail call void @bar1()
51+
; CHECK-NEXT: tail call fastcc void @bar1()
5452
; CHECK-NEXT: ret void
5553
;
5654
entry:
@@ -61,10 +59,10 @@ entry:
6159
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
6260
define internal void @bar() local_unnamed_addr #0 {
6361
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
64-
; CHECK-LABEL: define internal void @bar(
65-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
62+
; CHECK-LABEL: define internal fastcc void @bar(
63+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
6664
; CHECK-NEXT: [[ENTRY:.*:]]
67-
; CHECK-NEXT: tail call void @foo1()
65+
; CHECK-NEXT: tail call fastcc void @foo1()
6866
; CHECK-NEXT: ret void
6967
;
7068
entry:
@@ -75,10 +73,10 @@ entry:
7573
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
7674
define internal void @foo() local_unnamed_addr #0 {
7775
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
78-
; CHECK-LABEL: define internal void @foo(
79-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
76+
; CHECK-LABEL: define internal fastcc void @foo(
77+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
8078
; CHECK-NEXT: [[ENTRY:.*:]]
81-
; CHECK-NEXT: tail call void @bar()
79+
; CHECK-NEXT: tail call fastcc void @bar()
8280
; CHECK-NEXT: ret void
8381
;
8482
entry:
@@ -89,10 +87,10 @@ entry:
8987
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
9088
define internal void @bar4() local_unnamed_addr #0 {
9189
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
92-
; CHECK-LABEL: define internal void @bar4(
93-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
90+
; CHECK-LABEL: define internal fastcc void @bar4(
91+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
9492
; CHECK-NEXT: [[ENTRY:.*:]]
95-
; CHECK-NEXT: tail call void @bar2()
93+
; CHECK-NEXT: tail call fastcc void @bar2()
9694
; CHECK-NEXT: ret void
9795
;
9896
entry:
@@ -103,10 +101,10 @@ entry:
103101
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
104102
define internal void @bar2() local_unnamed_addr #0 {
105103
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
106-
; CHECK-LABEL: define internal void @bar2(
107-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
104+
; CHECK-LABEL: define internal fastcc void @bar2(
105+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
108106
; CHECK-NEXT: [[ENTRY:.*:]]
109-
; CHECK-NEXT: tail call void @bar3()
107+
; CHECK-NEXT: tail call fastcc void @bar3()
110108
; CHECK-NEXT: ret void
111109
;
112110
entry:
@@ -117,10 +115,10 @@ entry:
117115
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
118116
define internal void @bar3() local_unnamed_addr #0 {
119117
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
120-
; CHECK-LABEL: define internal void @bar3(
121-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
118+
; CHECK-LABEL: define internal fastcc void @bar3(
119+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
122120
; CHECK-NEXT: [[ENTRY:.*:]]
123-
; CHECK-NEXT: tail call void @bar4()
121+
; CHECK-NEXT: tail call fastcc void @bar4()
124122
; CHECK-NEXT: ret void
125123
;
126124
entry:
@@ -131,10 +129,10 @@ entry:
131129
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
132130
define internal void @fun() local_unnamed_addr #0 {
133131
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
134-
; CHECK-LABEL: define internal void @fun(
135-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
132+
; CHECK-LABEL: define internal fastcc void @fun(
133+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
136134
; CHECK-NEXT: [[ENTRY:.*:]]
137-
; CHECK-NEXT: tail call void @baz()
135+
; CHECK-NEXT: tail call fastcc void @baz()
138136
; CHECK-NEXT: ret void
139137
;
140138
entry:
@@ -145,15 +143,15 @@ entry:
145143
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
146144
define internal void @baz() local_unnamed_addr #0 {
147145
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
148-
; CHECK-LABEL: define internal void @baz(
149-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
146+
; CHECK-LABEL: define internal fastcc void @baz(
147+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
150148
; CHECK-NEXT: [[ENTRY:.*:]]
151-
; CHECK-NEXT: tail call void @fun()
149+
; CHECK-NEXT: tail call fastcc void @fun()
152150
; CHECK-NEXT: ret void
153151
;
154152
entry:
155153
tail call void @fun()
156154
ret void
157155
}
158156

159-
attributes #0 = { nofree noinline nosync nounwind memory(none) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
157+
attributes #0 = { nofree noinline nosync nounwind memory(none) uwtable }
Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,39 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5
2-
; RUN: opt < %s -passes=function-attrs -S | FileCheck %s
3-
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
4-
target triple = "aarch64-unknown-linux-gnu"
2+
; RUN: opt < %s -passes="lto<O2>" -S | FileCheck %s
53

64
; This test includes a call graph with multiple SCCs. The purpose of this is
75
; to check that norecurse is added to a function which calls a function which
86
; is indirectly recursive but is not part of the recursive chain.
97
; There are two SCCs in this test:
108
; SCC#1: bar2, bar3, bar4
119
; SCC#2: baz, fun
12-
; main() calls bar2 and baz, both of which are part of some indirect recursive
13-
; chain. but does not call back main() and hence main() can be marked as
14-
; norecurse. But main() does not have internal linkage, hence we avoid adding
15-
; norecurse for main() as well.
10+
; f1() calls bar2 and baz, both of which are part of some indirect recursive
11+
; chain. but does not call back f1() and hence f1() can be marked as
12+
; norecurse.
1613

17-
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
18-
define dso_local noundef i32 @main() local_unnamed_addr #0 {
19-
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
14+
; Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable
15+
define dso_local noundef i32 @main() local_unnamed_addr #1 {
16+
; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable
2017
; CHECK-LABEL: define dso_local noundef i32 @main(
2118
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
2219
; CHECK-NEXT: [[ENTRY:.*:]]
23-
; CHECK-NEXT: tail call void @bar2()
24-
; CHECK-NEXT: tail call void @baz()
20+
; CHECK-NEXT: [[TMP0:%.*]] = tail call fastcc i32 @f1()
21+
; CHECK-NEXT: ret i32 0
22+
;
23+
entry:
24+
tail call void @f1()
25+
ret i32 0
26+
}
27+
28+
29+
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
30+
define internal i32 @f1() local_unnamed_addr #0 {
31+
; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable
32+
; CHECK-LABEL: define internal fastcc noundef i32 @f1(
33+
; CHECK-SAME: ) unnamed_addr #[[ATTR0]] {
34+
; CHECK-NEXT: [[ENTRY:.*:]]
35+
; CHECK-NEXT: tail call fastcc void @bar2()
36+
; CHECK-NEXT: tail call fastcc void @baz()
2537
; CHECK-NEXT: ret i32 0
2638
;
2739
entry:
@@ -33,10 +45,10 @@ entry:
3345
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
3446
define internal void @bar4() local_unnamed_addr #0 {
3547
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
36-
; CHECK-LABEL: define internal void @bar4(
37-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
48+
; CHECK-LABEL: define internal fastcc void @bar4(
49+
; CHECK-SAME: ) unnamed_addr #[[ATTR1:[0-9]+]] {
3850
; CHECK-NEXT: [[ENTRY:.*:]]
39-
; CHECK-NEXT: tail call void @bar2()
51+
; CHECK-NEXT: tail call fastcc void @bar2()
4052
; CHECK-NEXT: ret void
4153
;
4254
entry:
@@ -47,10 +59,10 @@ entry:
4759
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
4860
define internal void @bar2() local_unnamed_addr #0 {
4961
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
50-
; CHECK-LABEL: define internal void @bar2(
51-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
62+
; CHECK-LABEL: define internal fastcc void @bar2(
63+
; CHECK-SAME: ) unnamed_addr #[[ATTR1]] {
5264
; CHECK-NEXT: [[ENTRY:.*:]]
53-
; CHECK-NEXT: tail call void @bar3()
65+
; CHECK-NEXT: tail call fastcc void @bar3()
5466
; CHECK-NEXT: ret void
5567
;
5668
entry:
@@ -61,10 +73,10 @@ entry:
6173
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
6274
define internal void @bar3() local_unnamed_addr #0 {
6375
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
64-
; CHECK-LABEL: define internal void @bar3(
65-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
76+
; CHECK-LABEL: define internal fastcc void @bar3(
77+
; CHECK-SAME: ) unnamed_addr #[[ATTR1]] {
6678
; CHECK-NEXT: [[ENTRY:.*:]]
67-
; CHECK-NEXT: tail call void @bar4()
79+
; CHECK-NEXT: tail call fastcc void @bar4()
6880
; CHECK-NEXT: ret void
6981
;
7082
entry:
@@ -75,10 +87,10 @@ entry:
7587
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
7688
define internal void @fun() local_unnamed_addr #0 {
7789
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
78-
; CHECK-LABEL: define internal void @fun(
79-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
90+
; CHECK-LABEL: define internal fastcc void @fun(
91+
; CHECK-SAME: ) unnamed_addr #[[ATTR1]] {
8092
; CHECK-NEXT: [[ENTRY:.*:]]
81-
; CHECK-NEXT: tail call void @baz()
93+
; CHECK-NEXT: tail call fastcc void @baz()
8294
; CHECK-NEXT: ret void
8395
;
8496
entry:
@@ -89,15 +101,16 @@ entry:
89101
; Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
90102
define internal void @baz() local_unnamed_addr #0 {
91103
; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable
92-
; CHECK-LABEL: define internal void @baz(
93-
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
104+
; CHECK-LABEL: define internal fastcc void @baz(
105+
; CHECK-SAME: ) unnamed_addr #[[ATTR1]] {
94106
; CHECK-NEXT: [[ENTRY:.*:]]
95-
; CHECK-NEXT: tail call void @fun()
107+
; CHECK-NEXT: tail call fastcc void @fun()
96108
; CHECK-NEXT: ret void
97109
;
98110
entry:
99111
tail call void @fun()
100112
ret void
101113
}
102114

103-
attributes #0 = { nofree noinline nosync nounwind memory(none) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
115+
attributes #0 = { nofree noinline nosync nounwind memory(none) uwtable }
116+
attributes #1 = { nofree noinline norecurse nosync nounwind memory(none) uwtable }

0 commit comments

Comments
 (0)