-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LoopUnroll] Introduce parallel accumulators when unrolling FP reductions. #166630
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
214c03f
7284232
3bdcf3c
3e3db0b
9c401f3
da24236
739c5e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1094,6 +1094,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, | |
| if (!RdxResult) { | ||
| RdxResult = PartialReductions.front(); | ||
| IRBuilder Builder(ExitBlock, ExitBlock->getFirstNonPHIIt()); | ||
| Builder.setFastMathFlags(Reductions.begin()->second.getFastMathFlags()); | ||
| RecurKind RK = Reductions.begin()->second.getRecurrenceKind(); | ||
| for (Instruction *RdxPart : drop_begin(PartialReductions)) { | ||
| RdxResult = Builder.CreateBinOp( | ||
|
|
@@ -1256,14 +1257,19 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, | |
| return std::nullopt; | ||
| RecurKind RK = RdxDesc.getRecurrenceKind(); | ||
| // Skip unsupported reductions. | ||
| // TODO: Handle additional reductions, including FP and min-max | ||
| // reductions. | ||
| if (!RecurrenceDescriptor::isIntegerRecurrenceKind(RK) || | ||
| // TODO: Handle additional reductions, including min-max reductions. | ||
| if (!(RecurrenceDescriptor::isIntegerRecurrenceKind(RK) || | ||
| RecurrenceDescriptor::isFloatingPointRecurrenceKind(RK)) || | ||
| RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || | ||
| RecurrenceDescriptor::isFindIVRecurrenceKind(RK) || | ||
| RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) | ||
| return std::nullopt; | ||
|
|
||
| if (RecurrenceDescriptor::isFloatingPointRecurrenceKind(RK)) { | ||
| if (!RdxDesc.getFastMathFlags().allowReassoc()) | ||
| return std::nullopt; | ||
| } | ||
|
||
|
|
||
| if (RdxDesc.IntermediateStore) | ||
| return std::nullopt; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -220,6 +220,137 @@ exit: | |
| ret i32 %res | ||
| } | ||
|
|
||
| define float @test_fadd_reduction(ptr %a, i64 %n) { | ||
| ; CHECK-LABEL: define float @test_fadd_reduction( | ||
| ; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*]]: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 | ||
| ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 | ||
| ; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]] | ||
| ; CHECK: [[ENTRY_NEW]]: | ||
| ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] | ||
| ; CHECK-NEXT: br label %[[LOOP:.*]] | ||
| ; CHECK: [[LOOP]]: | ||
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 | ||
| ; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[TMP2]] | ||
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 | ||
| ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 | ||
| ; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[TMP3]] | ||
| ; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 | ||
| ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 | ||
| ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] | ||
| ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] | ||
| ; CHECK: [[EXIT_UNR_LCSSA]]: | ||
| ; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]] | ||
| ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 | ||
| ; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] | ||
| ; CHECK: [[LOOP_EPIL_PREHEADER]]: | ||
| ; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] | ||
| ; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ] | ||
| ; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 | ||
| ; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]]) | ||
| ; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] | ||
| ; CHECK: [[LOOP_EPIL]]: | ||
| ; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 | ||
| ; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd reassoc float [[RDX_EPIL_INIT]], [[TMP4]] | ||
| ; CHECK-NEXT: br label %[[EXIT]] | ||
| ; CHECK: [[EXIT]]: | ||
| ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] | ||
| ; CHECK-NEXT: ret float [[RES]] | ||
| ; | ||
| entry: | ||
| br label %loop | ||
|
|
||
| loop: | ||
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
| %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] | ||
| %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv | ||
| %1 = load float, ptr %gep.a, align 16 | ||
| %rdx.next = fadd reassoc float %rdx, %1 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also add a test with a different fast-math flag, to make sure we don't introduce additional accumulators in that case?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done, thanks! |
||
| %iv.next = add nuw nsw i64 %iv, 1 | ||
| %ec = icmp eq i64 %iv.next, %n | ||
| br i1 %ec, label %exit, label %loop, !llvm.loop !0 | ||
|
|
||
| exit: | ||
| %res = phi float [ %rdx.next, %loop ] | ||
| ret float %res | ||
| } | ||
|
|
||
| define float @test_fadd_no_reassoc(ptr %a, i64 %n) { | ||
| ; CHECK-LABEL: define float @test_fadd_no_reassoc( | ||
| ; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { | ||
| ; CHECK-NEXT: [[ENTRY:.*]]: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 | ||
| ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 | ||
| ; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]] | ||
| ; CHECK: [[ENTRY_NEW]]: | ||
| ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] | ||
| ; CHECK-NEXT: br label %[[LOOP:.*]] | ||
| ; CHECK: [[LOOP]]: | ||
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 | ||
| ; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[TMP2]] | ||
| ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 | ||
| ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 | ||
| ; CHECK-NEXT: [[RDX_NEXT_1]] = fadd float [[RDX_NEXT]], [[TMP3]] | ||
| ; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 | ||
| ; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 | ||
| ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] | ||
| ; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] | ||
| ; CHECK: [[EXIT_UNR_LCSSA]]: | ||
| ; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RDX_UNR:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 | ||
| ; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]] | ||
| ; CHECK: [[LOOP_EPIL_PREHEADER]]: | ||
| ; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ] | ||
| ; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ] | ||
| ; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 | ||
| ; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]]) | ||
| ; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] | ||
| ; CHECK: [[LOOP_EPIL]]: | ||
| ; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 | ||
| ; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd float [[RDX_EPIL_INIT]], [[TMP4]] | ||
| ; CHECK-NEXT: br label %[[EXIT]] | ||
| ; CHECK: [[EXIT]]: | ||
| ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] | ||
| ; CHECK-NEXT: ret float [[RES]] | ||
| ; | ||
| entry: | ||
| br label %loop | ||
|
|
||
| loop: | ||
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
| %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] | ||
| %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv | ||
| %1 = load float, ptr %gep.a, align 16 | ||
| %rdx.next = fadd float %rdx, %1 | ||
| %iv.next = add nuw nsw i64 %iv, 1 | ||
| %ec = icmp eq i64 %iv.next, %n | ||
| br i1 %ec, label %exit, label %loop, !llvm.loop !0 | ||
|
|
||
| exit: | ||
| %res = phi float [ %rdx.next, %loop ] | ||
| ret float %res | ||
| } | ||
|
|
||
|
|
||
| !0 = distinct !{!0, !1} | ||
|
|
@@ -234,4 +365,6 @@ exit: | |
| ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} | ||
| ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} | ||
| ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]} | ||
| ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} | ||
| ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]} | ||
| ;. | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the condition is always true, given how
isFloatingPointRecurrenceKindis implemented, as long as RK != None, whichRecurrenceDescriptor::isReductionPHIshould already ensureThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense, thanks -- done!