-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[flang][OpenMP] Add reduction clause support to loop directive
#128849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,8 @@ | |
| #include "mlir/Transforms/DialectConversion.h" | ||
|
|
||
| #include <memory> | ||
| #include <optional> | ||
| #include <type_traits> | ||
|
|
||
| namespace flangomp { | ||
| #define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS | ||
|
|
@@ -58,7 +60,7 @@ class GenericLoopConversionPattern | |
| if (teamsLoopCanBeParallelFor(loopOp)) | ||
| rewriteToDistributeParallelDo(loopOp, rewriter); | ||
| else | ||
| rewriteToDistrbute(loopOp, rewriter); | ||
| rewriteToDistribute(loopOp, rewriter); | ||
| break; | ||
| } | ||
|
|
||
|
|
@@ -77,9 +79,6 @@ class GenericLoopConversionPattern | |
| if (loopOp.getOrder()) | ||
| return todo("order"); | ||
|
|
||
| if (!loopOp.getReductionVars().empty()) | ||
| return todo("reduction"); | ||
|
|
||
| return mlir::success(); | ||
| } | ||
|
|
||
|
|
@@ -168,7 +167,7 @@ class GenericLoopConversionPattern | |
| case ClauseBindKind::Parallel: | ||
| return rewriteToWsloop(loopOp, rewriter); | ||
| case ClauseBindKind::Teams: | ||
| return rewriteToDistrbute(loopOp, rewriter); | ||
| return rewriteToDistribute(loopOp, rewriter); | ||
| case ClauseBindKind::Thread: | ||
| return rewriteToSimdLoop(loopOp, rewriter); | ||
| } | ||
|
|
@@ -211,8 +210,9 @@ class GenericLoopConversionPattern | |
| loopOp, rewriter); | ||
| } | ||
|
|
||
| void rewriteToDistrbute(mlir::omp::LoopOp loopOp, | ||
| mlir::ConversionPatternRewriter &rewriter) const { | ||
| void rewriteToDistribute(mlir::omp::LoopOp loopOp, | ||
| mlir::ConversionPatternRewriter &rewriter) const { | ||
| assert(loopOp.getReductionVars().empty()); | ||
| rewriteToSingleWrapperOp<mlir::omp::DistributeOp, | ||
| mlir::omp::DistributeOperands>(loopOp, rewriter); | ||
| } | ||
|
|
@@ -246,6 +246,12 @@ class GenericLoopConversionPattern | |
| Fortran::common::openmp::EntryBlockArgs args; | ||
| args.priv.vars = clauseOps.privateVars; | ||
|
|
||
| if constexpr (!std::is_same_v<OpOperandsTy, | ||
| mlir::omp::DistributeOperands>) { | ||
| populateReductionClauseOps(loopOp, clauseOps); | ||
| args.reduction.vars = clauseOps.reductionVars; | ||
| } | ||
|
|
||
| auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps); | ||
| mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion()); | ||
|
|
||
|
|
@@ -275,8 +281,7 @@ class GenericLoopConversionPattern | |
|
|
||
| auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loopOp.getLoc(), | ||
| parallelClauseOps); | ||
| mlir::Block *parallelBlock = | ||
| genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); | ||
| genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); | ||
| parallelOp.setComposite(true); | ||
| rewriter.setInsertionPoint( | ||
| rewriter.create<mlir::omp::TerminatorOp>(loopOp.getLoc())); | ||
|
|
@@ -288,20 +293,54 @@ class GenericLoopConversionPattern | |
| rewriter.createBlock(&distributeOp.getRegion()); | ||
|
|
||
| mlir::omp::WsloopOperands wsloopClauseOps; | ||
| populateReductionClauseOps(loopOp, wsloopClauseOps); | ||
| Fortran::common::openmp::EntryBlockArgs wsloopArgs; | ||
| wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; | ||
|
|
||
| auto wsloopOp = | ||
| rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps); | ||
| wsloopOp.setComposite(true); | ||
| rewriter.createBlock(&wsloopOp.getRegion()); | ||
| genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion()); | ||
|
|
||
| mlir::IRMapping mapper; | ||
| mlir::Block &loopBlock = *loopOp.getRegion().begin(); | ||
|
|
||
| for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal( | ||
| loopBlock.getArguments(), parallelBlock->getArguments())) | ||
| auto loopBlockInterface = | ||
| llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp); | ||
| auto parallelBlockInterface = | ||
| llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*parallelOp); | ||
| auto wsloopBlockInterface = | ||
| llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*wsloopOp); | ||
|
|
||
| for (auto [loopOpArg, parallelOpArg] : | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Please add a comment to explain why we can match My concern is that, if at some point we introduce any other entry block arguments to the Actually, I think that casting these ops to their
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Used the |
||
| llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(), | ||
| parallelBlockInterface.getPrivateBlockArgs())) | ||
| mapper.map(loopOpArg, parallelOpArg); | ||
|
|
||
| for (auto [loopOpArg, wsloopOpArg] : | ||
| llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(), | ||
| wsloopBlockInterface.getReductionBlockArgs())) | ||
| mapper.map(loopOpArg, wsloopOpArg); | ||
|
|
||
| rewriter.clone(*loopOp.begin(), mapper); | ||
| } | ||
|
|
||
| void | ||
| populateReductionClauseOps(mlir::omp::LoopOp loopOp, | ||
| mlir::omp::ReductionClauseOps &clauseOps) const { | ||
| clauseOps.reductionMod = loopOp.getReductionModAttr(); | ||
| clauseOps.reductionVars = loopOp.getReductionVars(); | ||
|
|
||
| std::optional<mlir::ArrayAttr> reductionSyms = loopOp.getReductionSyms(); | ||
| if (reductionSyms) | ||
| clauseOps.reductionSyms.assign(reductionSyms->begin(), | ||
| reductionSyms->end()); | ||
|
|
||
| std::optional<llvm::ArrayRef<bool>> reductionByref = | ||
| loopOp.getReductionByref(); | ||
| if (reductionByref) | ||
| clauseOps.reductionByref.assign(reductionByref->begin(), | ||
| reductionByref->end()); | ||
| } | ||
| }; | ||
|
|
||
| class GenericLoopConversionPass | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,7 +75,7 @@ subroutine test_order() | |
| subroutine test_reduction() | ||
| integer :: i, dummy = 1 | ||
|
|
||
| ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction | ||
| ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction | ||
| ! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) { | ||
| ! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} { | ||
| ! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"} | ||
|
|
@@ -294,3 +294,46 @@ subroutine teams_loop_cannot_be_parallel_for_4 | |
| !$omp end parallel | ||
| END DO | ||
| end subroutine | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction | ||
| subroutine loop_parallel_bind_reduction | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add one of these reduction tests for the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| implicit none | ||
| integer :: x, i | ||
|
|
||
| ! CHECK: omp.wsloop | ||
| ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) | ||
| ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) { | ||
| ! CHECK-NEXT: omp.loop_nest {{.*}} { | ||
| ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} | ||
| ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} | ||
| ! CHECK: } | ||
| ! CHECK: } | ||
| !$omp loop bind(parallel) reduction(+: x) | ||
| do i = 0, 10 | ||
| x = x + i | ||
| end do | ||
| end subroutine | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPloop_teams_loop_reduction | ||
| subroutine loop_teams_loop_reduction | ||
| implicit none | ||
| integer :: x, i | ||
| ! CHECK: omp.teams { | ||
| ! CHECK: omp.parallel | ||
| ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>) { | ||
| ! CHECK: omp.distribute { | ||
| ! CHECK: omp.wsloop | ||
| ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) { | ||
| ! CHECK-NEXT: omp.loop_nest {{.*}} { | ||
| ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"} | ||
| ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"} | ||
| ! CHECK: } | ||
| ! CHECK: } | ||
| ! CHECK: } | ||
| ! CHECK: } | ||
| ! CHECK: } | ||
| !$omp teams loop reduction(+: x) | ||
| do i = 0, 10 | ||
| x = x + i | ||
| end do | ||
| end subroutine | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm guessing the compound directive splitting in the frontend already makes sure that
!$omp teams loop reduction(...)only adds thereductionclause to theteamsleaf construct, so that this assert is not triggered.We probably want to make that check in the MLIR verifier, though, since we can't rely on Flang lowering being the only source of OpenMP dialect operations. If it's an
omp.looptaking the implicit role of anomp.distribute, then we need to ensure it doesn't define areductionclause.We'd probably want to move the checks in
GenericLoopConversionPattern::rewriteStandaloneLoop,GenericLoopConversionPattern::matchAndRewriteand related utility functions from here to anextraClassDeclarationofmlir::omp::LoopOpto do this properly. Something likeLoopOpRole mlir::omp::LoopOp::getLoopRole(), where the possible values aresimd,wsloop,distributeanddistribute parallel wsloop.This can come as a follow-up PR, not something to be implemented here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have a semantic check for this: #128823. Can look into the verifier improment in a later PR, makes sense.