Skip to content

Commit 75dfac5

Browse files
committed
!fixup add option enabling parallel reduction phis
1 parent 994e218 commit 75dfac5

File tree

7 files changed

+51
-77
lines changed

7 files changed

+51
-77
lines changed

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
109109
#endif
110110
);
111111

112+
static cl::opt<bool> UnrollAddParallelReductions(
113+
"unroll-add-parallel-reductions", cl::init(false), cl::Hidden,
114+
cl::desc("Allow unrolling to add parallel reduction phis."));
112115

113116
/// Check if unrolling created a situation where we need to insert phi nodes to
114117
/// preserve LCSSA form.
@@ -668,7 +671,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
668671
// to not exit.
669672
DenseMap<PHINode *, RecurrenceDescriptor> Reductions;
670673
bool CanAddAdditionalAccumulators =
671-
!CompletelyUnroll && L->getNumBlocks() == 1 &&
674+
UnrollAddParallelReductions && !CompletelyUnroll &&
675+
L->getNumBlocks() == 1 &&
672676
(ULO.Runtime ||
673677
(ExitInfos.contains(Header) && ((ExitInfos[Header].TripCount != 0 &&
674678
ExitInfos[Header].BreakoutTrip == 0))));

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -801,33 +801,27 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) {
801801
; OTHER-NEXT: br label %[[LOOP:.*]]
802802
; OTHER: [[LOOP]]:
803803
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
804-
; OTHER-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
805-
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
806-
; OTHER-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
807-
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
804+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
808805
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
809806
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
810-
; OTHER-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
807+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
811808
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
812809
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
813810
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
814-
; OTHER-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
811+
; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
815812
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
816813
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
817814
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
818-
; OTHER-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP2]]
815+
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP2]]
819816
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
820817
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
821818
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
822-
; OTHER-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
819+
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP3]]
823820
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
824821
; OTHER-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
825822
; OTHER-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
826823
; OTHER: [[EXIT]]:
827-
; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
828-
; OTHER-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
829-
; OTHER-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
830-
; OTHER-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]]
824+
; OTHER-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
831825
; OTHER-NEXT: ret i32 [[BIN_RDX2]]
832826
;
833827
entry:
@@ -951,25 +945,23 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) {
951945
; OTHER-NEXT: br label %[[LOOP:.*]]
952946
; OTHER: [[LOOP]]:
953947
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
954-
; OTHER-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
955-
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
948+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
956949
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_1:%.*]], %[[LOOP]] ]
957950
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
958951
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
959-
; OTHER-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
952+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
960953
; OTHER-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]]
961954
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
962955
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
963956
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
964-
; OTHER-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
957+
; OTHER-NEXT: [[RDX_NEXT_1]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
965958
; OTHER-NEXT: [[RDX_2_NEXT_1]] = mul i32 [[RDX_2_NEXT]], [[TMP1]]
966959
; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
967960
; OTHER-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_NEXT_1]], 1024
968961
; OTHER-NEXT: br i1 [[EC_1]], label %[[EXIT:.*]], label %[[LOOP]]
969962
; OTHER: [[EXIT]]:
970-
; OTHER-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
963+
; OTHER-NEXT: [[BIN_RDX:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
971964
; OTHER-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_1]], %[[LOOP]] ]
972-
; OTHER-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
973965
; OTHER-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX]], [[RES_2]]
974966
; OTHER-NEXT: ret i32 [[SUM]]
975967
;
@@ -1026,26 +1018,23 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
10261018
; OTHER-NEXT: br label %[[LOOP:.*]]
10271019
; OTHER: [[LOOP]]:
10281020
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
1029-
; OTHER-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
1030-
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
1031-
; OTHER-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
1032-
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
1021+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
10331022
; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ]
10341023
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
10351024
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2
1036-
; OTHER-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP2]]
1025+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP2]]
10371026
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
10381027
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
10391028
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2
1040-
; OTHER-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP3]]
1029+
; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]]
10411030
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
10421031
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
10431032
; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2
1044-
; OTHER-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP4]]
1033+
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP4]]
10451034
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
10461035
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
10471036
; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2
1048-
; OTHER-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP5]]
1037+
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP5]]
10491038
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
10501039
; OTHER-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
10511040
; OTHER-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
@@ -1054,14 +1043,11 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
10541043
; OTHER-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
10551044
; OTHER-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
10561045
; OTHER-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
1057-
; OTHER-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
1058-
; OTHER-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
1059-
; OTHER-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
10601046
; OTHER-NEXT: br label %[[EXIT_UNR_LCSSA]]
10611047
; OTHER: [[EXIT_UNR_LCSSA]]:
1062-
; OTHER-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
1048+
; OTHER-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
10631049
; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
1064-
; OTHER-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
1050+
; OTHER-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
10651051
; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
10661052
; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
10671053
; OTHER: [[LOOP_EPIL_PREHEADER]]:

llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -196,14 +196,13 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
196196
; CHECK-V8: loop:
197197
; CHECK-V8-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
198198
; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
199-
; CHECK-V8-NEXT: [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_2:%.*]], [[LOOP]] ]
200199
; CHECK-V8-NEXT: [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
201200
; CHECK-V8-NEXT: [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
202201
; CHECK-V8-NEXT: [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
203202
; CHECK-V8-NEXT: [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
204203
; CHECK-V8-NEXT: [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
205204
; CHECK-V8-NEXT: [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
206-
; CHECK-V8-NEXT: [[ACC_NEXT_2]] = add i32 [[UMAX]], [[ACC1]]
205+
; CHECK-V8-NEXT: [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
207206
; CHECK-V8-NEXT: [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
208207
; CHECK-V8-NEXT: store i32 [[UMAX]], ptr [[ADDR_C]], align 4
209208
; CHECK-V8-NEXT: [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -213,15 +212,14 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
213212
; CHECK-V8-NEXT: [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
214213
; CHECK-V8-NEXT: [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
215214
; CHECK-V8-NEXT: [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
216-
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC]]
215+
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
217216
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
218217
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
219218
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
220219
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
221220
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
222221
; CHECK-V8: exit:
223-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA1:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
224-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = add i32 [[ACC_NEXT_1]], [[ACC_NEXT_2]]
222+
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
225223
; CHECK-V8-NEXT: ret i32 [[ACC_NEXT_LCSSA]]
226224
;
227225
entry:
@@ -254,14 +252,13 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
254252
; CHECK-V8: loop:
255253
; CHECK-V8-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
256254
; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
257-
; CHECK-V8-NEXT: [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_2:%.*]], [[LOOP]] ]
258255
; CHECK-V8-NEXT: [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
259256
; CHECK-V8-NEXT: [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
260257
; CHECK-V8-NEXT: [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
261258
; CHECK-V8-NEXT: [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
262259
; CHECK-V8-NEXT: [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
263260
; CHECK-V8-NEXT: [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
264-
; CHECK-V8-NEXT: [[ACC_NEXT_2]] = add i32 [[UMAX]], [[ACC1]]
261+
; CHECK-V8-NEXT: [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
265262
; CHECK-V8-NEXT: [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
266263
; CHECK-V8-NEXT: store i32 [[UMAX]], ptr [[ADDR_C]], align 4
267264
; CHECK-V8-NEXT: [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -271,15 +268,14 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
271268
; CHECK-V8-NEXT: [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
272269
; CHECK-V8-NEXT: [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
273270
; CHECK-V8-NEXT: [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
274-
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC]]
271+
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
275272
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
276273
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
277274
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
278275
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
279276
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
280277
; CHECK-V8: exit:
281-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA1:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
282-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = add i32 [[ACC_NEXT_1]], [[ACC_NEXT_2]]
278+
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
283279
; CHECK-V8-NEXT: ret i32 [[ACC_NEXT_LCSSA]]
284280
;
285281
entry:

llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -p loop-unroll -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s
2+
; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s
33

44
define i32 @test_add(ptr %src, i64 %n, i32 %start) {
55
; CHECK-LABEL: define i32 @test_add(

llvm/test/Transforms/LoopUnroll/runtime-loop5.ll

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,26 +75,23 @@ define i3 @test(ptr %a, i3 %n) {
7575
; UNROLL-4-NEXT: br label [[FOR_BODY:%.*]]
7676
; UNROLL-4: for.body:
7777
; UNROLL-4-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
78-
; UNROLL-4-NEXT: [[SUM_02_1:%.*]] = phi i3 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
79-
; UNROLL-4-NEXT: [[ADD_1:%.*]] = phi i3 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
80-
; UNROLL-4-NEXT: [[SUM_02_3:%.*]] = phi i3 [ [[ADD_3:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
81-
; UNROLL-4-NEXT: [[SUM_02:%.*]] = phi i3 [ [[ADD1:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
78+
; UNROLL-4-NEXT: [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
8279
; UNROLL-4-NEXT: [[NITER:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
8380
; UNROLL-4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i3, ptr [[A:%.*]], i64 [[INDVARS_IV]]
8481
; UNROLL-4-NEXT: [[TMP2:%.*]] = load i3, ptr [[ARRAYIDX]], align 1
85-
; UNROLL-4-NEXT: [[ADD1]] = add i3 [[TMP2]], [[SUM_02]]
82+
; UNROLL-4-NEXT: [[ADD:%.*]] = add nsw i3 [[TMP2]], [[SUM_02]]
8683
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
8784
; UNROLL-4-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
8885
; UNROLL-4-NEXT: [[TMP3:%.*]] = load i3, ptr [[ARRAYIDX_1]], align 1
89-
; UNROLL-4-NEXT: [[ADD]] = add i3 [[TMP3]], [[SUM_02_1]]
86+
; UNROLL-4-NEXT: [[ADD_1:%.*]] = add nsw i3 [[TMP3]], [[ADD]]
9087
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
9188
; UNROLL-4-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
9289
; UNROLL-4-NEXT: [[TMP4:%.*]] = load i3, ptr [[ARRAYIDX_2]], align 1
93-
; UNROLL-4-NEXT: [[ADD_2]] = add i3 [[TMP4]], [[ADD_1]]
90+
; UNROLL-4-NEXT: [[ADD_2:%.*]] = add nsw i3 [[TMP4]], [[ADD_1]]
9491
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
9592
; UNROLL-4-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
9693
; UNROLL-4-NEXT: [[TMP5:%.*]] = load i3, ptr [[ARRAYIDX_3]], align 1
97-
; UNROLL-4-NEXT: [[ADD_3]] = add i3 [[TMP5]], [[SUM_02_3]]
94+
; UNROLL-4-NEXT: [[ADD_3]] = add nsw i3 [[TMP5]], [[ADD_2]]
9895
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
9996
; UNROLL-4-NEXT: [[NITER_NEXT_3]] = add i3 [[NITER]], -4
10097
; UNROLL-4-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NEXT_3]], [[UNROLL_ITER]]
@@ -103,14 +100,11 @@ define i3 @test(ptr %a, i3 %n) {
103100
; UNROLL-4-NEXT: [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
104101
; UNROLL-4-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
105102
; UNROLL-4-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
106-
; UNROLL-4-NEXT: [[BIN_RDX:%.*]] = add i3 [[ADD]], [[ADD1]]
107-
; UNROLL-4-NEXT: [[BIN_RDX2:%.*]] = add i3 [[ADD_2]], [[BIN_RDX]]
108-
; UNROLL-4-NEXT: [[BIN_RDX3:%.*]] = add i3 [[ADD_3]], [[BIN_RDX2]]
109103
; UNROLL-4-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
110104
; UNROLL-4: for.end.loopexit.unr-lcssa:
111-
; UNROLL-4-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i3 [ poison, [[FOR_BODY_PREHEADER]] ], [ [[BIN_RDX3]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
105+
; UNROLL-4-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i3 [ poison, [[FOR_BODY_PREHEADER]] ], [ [[ADD_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
112106
; UNROLL-4-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
113-
; UNROLL-4-NEXT: [[SUM_02_UNR:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[BIN_RDX3]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
107+
; UNROLL-4-NEXT: [[SUM_02_UNR:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
114108
; UNROLL-4-NEXT: [[LCMP_MOD:%.*]] = icmp ne i3 [[XTRAITER]], 0
115109
; UNROLL-4-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
116110
; UNROLL-4: for.body.epil.preheader:

llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -p loop-unroll -S %s | FileCheck %s
2+
; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -S %s | FileCheck %s
33

44
define i32 @test_add_reduction(ptr %a, i64 %n) {
55
; CHECK-LABEL: define i32 @test_add_reduction(

0 commit comments

Comments
 (0)