Skip to content

Commit 739c5e1

Browse files
committed
fixup! [LoopUnroll] Introduce parallel accumulators when unrolling FP reductions.
1 parent da24236 commit 739c5e1

File tree

1 file changed

+68
-2
lines changed

1 file changed

+68
-2
lines changed

llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ define float @test_fadd_reduction(ptr %a, i64 %n) {
313313
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
314314
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
315315
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
316-
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
316+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
317317
; CHECK: [[EXIT_UNR_LCSSA]]:
318318
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
319319
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
@@ -379,7 +379,7 @@ define float @test_fadd_no_reassoc(ptr %a, i64 %n) {
379379
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
380380
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
381381
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
382-
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
382+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
383383
; CHECK: [[EXIT_UNR_LCSSA]]:
384384
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
385385
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
@@ -419,6 +419,70 @@ exit:
419419
ret float %res
420420
}
421421

422+
define float @test_fadd_other_fastmath(ptr %a, i64 %n) {
423+
; CHECK-LABEL: define float @test_fadd_other_fastmath(
424+
; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
425+
; CHECK-NEXT: [[ENTRY:.*]]:
426+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
427+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
428+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
429+
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
430+
; CHECK: [[ENTRY_NEW]]:
431+
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
432+
; CHECK-NEXT: br label %[[LOOP:.*]]
433+
; CHECK: [[LOOP]]:
434+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
435+
; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
436+
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
437+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
438+
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16
439+
; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd contract float [[RDX]], [[TMP2]]
440+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
441+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
442+
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16
443+
; CHECK-NEXT: [[RDX_NEXT_1]] = fadd contract float [[RDX_NEXT]], [[TMP3]]
444+
; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
445+
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
446+
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
447+
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
448+
; CHECK: [[EXIT_UNR_LCSSA]]:
449+
; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
450+
; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
451+
; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
452+
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
453+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
454+
; CHECK: [[LOOP_EPIL_PREHEADER]]:
455+
; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
456+
; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
457+
; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
458+
; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
459+
; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
460+
; CHECK: [[LOOP_EPIL]]:
461+
; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]]
462+
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16
463+
; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd contract float [[RDX_EPIL_INIT]], [[TMP4]]
464+
; CHECK-NEXT: br label %[[EXIT]]
465+
; CHECK: [[EXIT]]:
466+
; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
467+
; CHECK-NEXT: ret float [[RES]]
468+
;
469+
entry:
470+
br label %loop
471+
472+
loop:
473+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
474+
%rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ]
475+
%gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv
476+
%1 = load float, ptr %gep.a, align 16
477+
%rdx.next = fadd contract float %rdx, %1
478+
%iv.next = add nuw nsw i64 %iv, 1
479+
%ec = icmp eq i64 %iv.next, %n
480+
br i1 %ec, label %exit, label %loop, !llvm.loop !0
481+
482+
exit:
483+
%res = phi float [ %rdx.next, %loop ]
484+
ret float %res
485+
}
422486

423487
!0 = distinct !{!0, !1}
424488
!1 = !{!"llvm.loop.unroll.count", i32 2}
@@ -434,4 +498,6 @@ exit:
434498
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
435499
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
436500
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
501+
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
502+
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]}
437503
;.

0 commit comments

Comments
 (0)