Skip to content

Commit c431cb1

Browse files
committed
fixup! [LoopUnroll] Introduce parallel accumulators when unrolling FP reductions. (llvm#166630)
1 parent 2505bc9 commit c431cb1

File tree

1 file changed

+52
-48
lines changed

1 file changed

+52
-48
lines changed

llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll

Lines changed: 52 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ define <4 x i32> @test_vector_add_reduction(ptr %a, i64 %n) {
230230
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
231231
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
232232
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
233-
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
233+
; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
234234
; CHECK: [[ENTRY_NEW]]:
235235
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
236236
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -249,27 +249,28 @@ define <4 x i32> @test_vector_add_reduction(ptr %a, i64 %n) {
249249
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
250250
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
251251
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
252-
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
253-
; CHECK: [[EXIT_UNR_LCSSA]]:
254-
; CHECK-NEXT: [[RES_PH:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
255-
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
256-
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
252+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
253+
; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
254+
; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
255+
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
256+
; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
257257
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[RDX_NEXT_1]], [[RDX_NEXT]]
258-
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
259-
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
260-
; CHECK: [[LOOP_EPIL_PREHEADER]]:
261-
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
262-
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ]
258+
; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]]
259+
; CHECK: [[EXIT_UNR_LCSSA]]:
260+
; CHECK-NEXT: [[RES_PH:%.*]] = phi <4 x i32> [ poison, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
261+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
262+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
263263
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
264-
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
264+
; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
265+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
265266
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
266267
; CHECK: [[LOOP_EPIL]]:
267268
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV_EPIL_INIT]]
268269
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[GEP_A_EPIL]], align 16
269270
; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = add <4 x i32> [[RDX_EPIL_INIT]], [[TMP4]]
270271
; CHECK-NEXT: br label %[[EXIT]]
271272
; CHECK: [[EXIT]]:
272-
; CHECK-NEXT: [[RES:%.*]] = phi <4 x i32> [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
273+
; CHECK-NEXT: [[RES:%.*]] = phi <4 x i32> [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
273274
; CHECK-NEXT: ret <4 x i32> [[RES]]
274275
;
275276
entry:
@@ -297,7 +298,7 @@ define float @test_fadd_reduction(ptr %a, i64 %n) {
297298
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
298299
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
299300
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
300-
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
301+
; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
301302
; CHECK: [[ENTRY_NEW]]:
302303
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
303304
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -316,27 +317,28 @@ define float @test_fadd_reduction(ptr %a, i64 %n) {
316317
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
317318
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
318319
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
319-
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
320-
; CHECK: [[EXIT_UNR_LCSSA]]:
321-
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
322-
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
323-
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
320+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
321+
; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
322+
; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
323+
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
324+
; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
324325
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]]
325-
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
326-
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
327-
; CHECK: [[LOOP_EPIL_PREHEADER]]:
328-
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
329-
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ]
326+
; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]]
327+
; CHECK: [[EXIT_UNR_LCSSA]]:
328+
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ poison, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
329+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
330+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
330331
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
331-
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
332+
; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
333+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
332334
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
333335
; CHECK: [[LOOP_EPIL]]:
334336
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
335337
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
336338
; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd reassoc float [[RDX_EPIL_INIT]], [[TMP4]]
337339
; CHECK-NEXT: br label %[[EXIT]]
338340
; CHECK: [[EXIT]]:
339-
; CHECK-NEXT: [[RES:%.*]] = phi float [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
341+
; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
340342
; CHECK-NEXT: ret float [[RES]]
341343
;
342344
entry:
@@ -364,7 +366,7 @@ define float @test_fadd_no_reassoc(ptr %a, i64 %n) {
364366
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
365367
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
366368
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
367-
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
369+
; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
368370
; CHECK: [[ENTRY_NEW]]:
369371
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
370372
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -382,18 +384,19 @@ define float @test_fadd_no_reassoc(ptr %a, i64 %n) {
382384
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
383385
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
384386
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
385-
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
387+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
388+
; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
389+
; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
390+
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
391+
; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
392+
; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]]
386393
; CHECK: [[EXIT_UNR_LCSSA]]:
387-
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
388-
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
389-
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
390-
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
391-
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
392-
; CHECK: [[LOOP_EPIL_PREHEADER]]:
393-
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
394-
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
394+
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
395+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
396+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
395397
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
396-
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
398+
; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
399+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
397400
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
398401
; CHECK: [[LOOP_EPIL]]:
399402
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
@@ -429,7 +432,7 @@ define float @test_fadd_other_fastmath(ptr %a, i64 %n) {
429432
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
430433
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
431434
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
432-
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
435+
; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
433436
; CHECK: [[ENTRY_NEW]]:
434437
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
435438
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -447,18 +450,19 @@ define float @test_fadd_other_fastmath(ptr %a, i64 %n) {
447450
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
448451
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
449452
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
450-
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
453+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
454+
; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
455+
; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
456+
; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
457+
; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
458+
; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]]
451459
; CHECK: [[EXIT_UNR_LCSSA]]:
452-
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
453-
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
454-
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
455-
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
456-
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
457-
; CHECK: [[LOOP_EPIL_PREHEADER]]:
458-
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
459-
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
460+
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
461+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
462+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
460463
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
461-
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
464+
; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
465+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
462466
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
463467
; CHECK: [[LOOP_EPIL]]:
464468
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]

0 commit comments

Comments
 (0)