Skip to content

Commit 3c57b1f

Browse files
committed
[FuncSpec] Consider literal constants of recursive functions
Enable specialization of literal constants by default for recursive functions.
1 parent d68083f commit 3c57b1f

File tree

4 files changed

+120
-32
lines changed

4 files changed

+120
-32
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,11 @@ class FunctionSpecializer {
311311
/// @param FuncSize Cost of specializing a function.
312312
/// @param AllSpecs A vector to add potential specializations to.
313313
/// @param SM A map for a function's specialisation range
314+
/// @param ConsiderLiterals Whether to specialize on literal constants
314315
/// @return True, if any potential specializations were found
315316
bool findSpecializations(Function *F, unsigned FuncSize,
316-
SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
317+
SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM,
318+
bool ConsiderLiterals);
317319

318320
/// Compute the inlining bonus for replacing argument \p A with constant \p C.
319321
unsigned getInliningBonus(Argument *A, Constant *C);
@@ -328,7 +330,7 @@ class FunctionSpecializer {
328330

329331
/// Determine if it is possible to specialise the function for constant values
330332
/// of the formal parameter \p A.
331-
bool isArgumentInteresting(Argument *A);
333+
bool isArgumentInteresting(Argument *A, bool ConsiderLiterals);
332334

333335
/// Check if the value \p V (an actual argument) is a constant or can only
334336
/// have a constant value. Return that constant.

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,8 @@ bool FunctionSpecializer::run() {
663663
if (Inserted && Metrics.isRecursive)
664664
promoteConstantStackValues(&F);
665665

666-
if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
666+
bool ConsiderLiterals = SpecializeLiteralConstant || Metrics.isRecursive;
667+
if (!findSpecializations(&F, FuncSize, AllSpecs, SM, ConsiderLiterals)) {
667668
LLVM_DEBUG(
668669
dbgs() << "FnSpecialization: No possible specializations found for "
669670
<< F.getName() << "\n");
@@ -803,7 +804,8 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
803804

804805
bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
805806
SmallVectorImpl<Spec> &AllSpecs,
806-
SpecMap &SM) {
807+
SpecMap &SM,
808+
bool ConsiderLiterals) {
807809
// A mapping from a specialisation signature to the index of the respective
808810
// entry in the all specialisation array. Used to ensure uniqueness of
809811
// specialisations.
@@ -812,7 +814,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
812814
// Get a list of interesting arguments.
813815
SmallVector<Argument *> Args;
814816
for (Argument &Arg : F->args())
815-
if (isArgumentInteresting(&Arg))
817+
if (isArgumentInteresting(&Arg, ConsiderLiterals))
816818
Args.push_back(&Arg);
817819

818820
if (Args.empty())
@@ -1032,14 +1034,16 @@ unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {
10321034

10331035
/// Determine if it is possible to specialise the function for constant values
10341036
/// of the formal parameter \p A.
1035-
bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
1037+
bool FunctionSpecializer::isArgumentInteresting(Argument *A,
1038+
bool ConsiderLiterals) {
10361039
// No point in specialization if the argument is unused.
10371040
if (A->user_empty())
10381041
return false;
10391042

10401043
Type *Ty = A->getType();
1041-
if (!Ty->isPointerTy() && (!SpecializeLiteralConstant ||
1042-
(!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy())))
1044+
if (!Ty->isPointerTy() &&
1045+
(!ConsiderLiterals ||
1046+
(!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy())))
10431047
return false;
10441048

10451049
// SCCP solver does not record an argument that will be constructed on

llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,27 @@ define i32 @main(ptr %0, i32 %1) {
6464
; CHECK-LABEL: define i32 @main(
6565
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
6666
; CHECK-NEXT: call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]])
67-
; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 0)
67+
; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]])
6868
; CHECK-NEXT: ret i32 0
6969
;
7070
;
7171
; CHECK-LABEL: define internal void @func.specialized.1(
72-
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
72+
; CHECK-SAME: ptr [[TMP0:%.*]]) {
7373
; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
74-
; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
74+
; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 4
7575
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
7676
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
77-
; CHECK-NEXT: br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]]
78-
; CHECK: [[BB6]]:
77+
; CHECK-NEXT: br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]]
78+
; CHECK: [[BB5]]:
7979
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
8080
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
8181
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]]
8282
; CHECK-NEXT: call void @decrement(ptr [[TMP8]])
8383
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
8484
; CHECK-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
85-
; CHECK-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]])
86-
; CHECK-NEXT: br label %[[BB12]]
87-
; CHECK: [[BB12]]:
85+
; CHECK-NEXT: call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP10]])
86+
; CHECK-NEXT: br label %[[BB11]]
87+
; CHECK: [[BB11]]:
8888
; CHECK-NEXT: ret void
8989
;
9090
;
@@ -108,6 +108,46 @@ define i32 @main(ptr %0, i32 %1) {
108108
; CHECK-NEXT: ret void
109109
;
110110
;
111+
; CHECK-LABEL: define internal void @func.specialized.3(
112+
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
113+
; CHECK-NEXT: [[TMP3:%.*]] = alloca i32, align 4
114+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
115+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
116+
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
117+
; CHECK-NEXT: br i1 [[TMP5]], label %[[BB12:.*]], label %[[BB6:.*]]
118+
; CHECK: [[BB6]]:
119+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
120+
; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
121+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP8]]
122+
; CHECK-NEXT: call void @decrement(ptr [[TMP9]])
123+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
124+
; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
125+
; CHECK-NEXT: call void @func.specialized.3(ptr [[TMP0]], i32 [[TMP11]])
126+
; CHECK-NEXT: br label %[[BB12]]
127+
; CHECK: [[BB12]]:
128+
; CHECK-NEXT: ret void
129+
;
130+
;
131+
; ONE-ITER-LABEL: define internal void @func(
132+
; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], ptr nocapture [[TMP2:%.*]]) {
133+
; ONE-ITER-NEXT: [[TMP4:%.*]] = alloca i32, align 4
134+
; ONE-ITER-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4
135+
; ONE-ITER-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
136+
; ONE-ITER-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1
137+
; ONE-ITER-NEXT: br i1 [[TMP6]], label %[[BB13:.*]], label %[[BB7:.*]]
138+
; ONE-ITER: [[BB7]]:
139+
; ONE-ITER-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
140+
; ONE-ITER-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
141+
; ONE-ITER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP9]]
142+
; ONE-ITER-NEXT: call void [[TMP2]](ptr [[TMP10]])
143+
; ONE-ITER-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4
144+
; ONE-ITER-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], -1
145+
; ONE-ITER-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP12]], ptr [[TMP2]])
146+
; ONE-ITER-NEXT: br label %[[BB13]]
147+
; ONE-ITER: [[BB13]]:
148+
; ONE-ITER-NEXT: ret void
149+
;
150+
;
111151
; ONE-ITER-LABEL: define internal void @increment(
112152
; ONE-ITER-SAME: ptr nocapture [[TMP0:%.*]]) {
113153
; ONE-ITER-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -127,27 +167,27 @@ define i32 @main(ptr %0, i32 %1) {
127167
; ONE-ITER-LABEL: define i32 @main(
128168
; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
129169
; ONE-ITER-NEXT: call void @func.specialized.2(ptr [[TMP0]], i32 [[TMP1]])
130-
; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 0)
170+
; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]])
131171
; ONE-ITER-NEXT: ret i32 0
132172
;
133173
;
134174
; ONE-ITER-LABEL: define internal void @func.specialized.1(
135-
; ONE-ITER-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
175+
; ONE-ITER-SAME: ptr [[TMP0:%.*]]) {
136176
; ONE-ITER-NEXT: [[TMP2:%.*]] = alloca i32, align 4
137-
; ONE-ITER-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
177+
; ONE-ITER-NEXT: store i32 0, ptr [[TMP2]], align 4
138178
; ONE-ITER-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
139179
; ONE-ITER-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
140-
; ONE-ITER-NEXT: br i1 [[TMP4]], label %[[BB12:.*]], label %[[BB6:.*]]
141-
; ONE-ITER: [[BB6]]:
180+
; ONE-ITER-NEXT: br i1 [[TMP4]], label %[[BB11:.*]], label %[[BB5:.*]]
181+
; ONE-ITER: [[BB5]]:
142182
; ONE-ITER-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
143183
; ONE-ITER-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
144184
; ONE-ITER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[TMP7]]
145185
; ONE-ITER-NEXT: call void @decrement(ptr [[TMP8]])
146186
; ONE-ITER-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
147187
; ONE-ITER-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
148-
; ONE-ITER-NEXT: call void @func.specialized.1(ptr [[TMP0]], i32 [[TMP10]])
149-
; ONE-ITER-NEXT: br label %[[BB12]]
150-
; ONE-ITER: [[BB12]]:
188+
; ONE-ITER-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
189+
; ONE-ITER-NEXT: br label %[[BB11]]
190+
; ONE-ITER: [[BB11]]:
151191
; ONE-ITER-NEXT: ret void
152192
;
153193
;

llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ entry:
5151
; CHECK-NEXT: [[ENTRY:.*:]]
5252
; CHECK-NEXT: br i1 [[FLAG]], label %[[PLUS:.*]], label %[[MINUS:.*]]
5353
; CHECK: [[PLUS]]:
54-
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.specialized.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
54+
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
5555
; CHECK-NEXT: br label %[[MERGE:.*]]
5656
; CHECK: [[MINUS]]:
5757
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.specialized.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
@@ -79,10 +79,10 @@ entry:
7979
; CHECK-LABEL: define internal i64 @compute.specialized.1(
8080
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
8181
; CHECK-NEXT: [[ENTRY:.*:]]
82-
; CHECK-NEXT: [[OP1:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]])
8382
; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
84-
; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus)
85-
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP1]], [[OP0]]
83+
; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
84+
; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
85+
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
8686
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
8787
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
8888
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
@@ -93,13 +93,13 @@ entry:
9393
; CHECK-LABEL: define internal i64 @compute.specialized.2(
9494
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
9595
; CHECK-NEXT: [[ENTRY:.*:]]
96-
; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
97-
; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
98-
; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
96+
; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42)
97+
; CHECK-NEXT: [[OP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
98+
; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus)
9999
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
100100
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
101101
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
102-
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
102+
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], 42
103103
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
104104
; CHECK-NEXT: ret i64 [[MUL]]
105105
;
@@ -117,3 +117,45 @@ entry:
117117
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
118118
; CHECK-NEXT: ret i64 [[MUL]]
119119
;
120+
;
121+
; CHECK-LABEL: define internal i64 @compute.specialized.4(
122+
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
123+
; CHECK-NEXT: [[ENTRY:.*:]]
124+
; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 42)
125+
; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 42)
126+
; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.4(i64 [[X]], i64 42, ptr @plus, ptr @plus)
127+
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
128+
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
129+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
130+
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], 42
131+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
132+
; CHECK-NEXT: ret i64 [[MUL]]
133+
;
134+
;
135+
; CHECK-LABEL: define internal i64 @compute.specialized.5(
136+
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
137+
; CHECK-NEXT: [[ENTRY:.*:]]
138+
; CHECK-NEXT: [[OP0:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
139+
; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
140+
; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.5(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
141+
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
142+
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
143+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
144+
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
145+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
146+
; CHECK-NEXT: ret i64 [[MUL]]
147+
;
148+
;
149+
; CHECK-LABEL: define internal i64 @compute.specialized.6(
150+
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[BINOP1:%.*]], ptr [[BINOP2:%.*]]) {
151+
; CHECK-NEXT: [[ENTRY:.*:]]
152+
; CHECK-NEXT: [[OP0:%.*]] = call i64 [[BINOP1]](i64 [[X]], i64 [[Y]])
153+
; CHECK-NEXT: [[OP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
154+
; CHECK-NEXT: [[OP2:%.*]] = call i64 @compute.specialized.6(i64 [[X]], i64 [[Y]], ptr [[BINOP1]], ptr @plus)
155+
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[OP0]], [[OP1]]
156+
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[ADD0]], [[OP2]]
157+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD1]], [[X]]
158+
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[DIV]], [[Y]]
159+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], 2
160+
; CHECK-NEXT: ret i64 [[MUL]]
161+
;

0 commit comments

Comments
 (0)