-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[flang][OpenMP] Add reduction clause support to loop directive
#128849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-flang-openmp Author: Kareem Ergawy (ergawy) ChangesExtends Full diff: https://github.com/llvm/llvm-project/pull/128849.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index bf94166edc079..7a9f65982e69c 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -15,6 +15,8 @@
#include "mlir/Transforms/DialectConversion.h"
#include <memory>
+#include <optional>
+#include <type_traits>
namespace flangomp {
#define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS
@@ -77,9 +79,6 @@ class GenericLoopConversionPattern
if (loopOp.getOrder())
return todo("order");
- if (!loopOp.getReductionVars().empty())
- return todo("reduction");
-
return mlir::success();
}
@@ -213,6 +212,7 @@ class GenericLoopConversionPattern
void rewriteToDistrbute(mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
+ assert(loopOp.getReductionVars().empty());
rewriteToSingleWrapperOp<mlir::omp::DistributeOp,
mlir::omp::DistributeOperands>(loopOp, rewriter);
}
@@ -246,6 +246,12 @@ class GenericLoopConversionPattern
Fortran::common::openmp::EntryBlockArgs args;
args.priv.vars = clauseOps.privateVars;
+ if constexpr (!std::is_same_v<OpOperandsTy,
+ mlir::omp::DistributeOperands>) {
+ populateReductionClauseOps(loopOp, clauseOps);
+ args.reduction.vars = clauseOps.reductionVars;
+ }
+
auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps);
mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion());
@@ -288,20 +294,51 @@ class GenericLoopConversionPattern
rewriter.createBlock(&distributeOp.getRegion());
mlir::omp::WsloopOperands wsloopClauseOps;
+ populateReductionClauseOps(loopOp, wsloopClauseOps);
+ Fortran::common::openmp::EntryBlockArgs wsloopArgs;
+ wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
+
auto wsloopOp =
rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps);
wsloopOp.setComposite(true);
- rewriter.createBlock(&wsloopOp.getRegion());
+ mlir::Block *loopBlock =
+ genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion());
mlir::IRMapping mapper;
- mlir::Block &loopBlock = *loopOp.getRegion().begin();
- for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal(
- loopBlock.getArguments(), parallelBlock->getArguments()))
+ auto loopBlockInterface =
+ llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp);
+
+ for (auto [loopOpArg, parallelOpArg] :
+ llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(),
+ parallelBlock->getArguments()))
mapper.map(loopOpArg, parallelOpArg);
+ for (auto [loopOpArg, wsloopOpArg] :
+ llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(),
+ loopBlock->getArguments()))
+ mapper.map(loopOpArg, wsloopOpArg);
+
rewriter.clone(*loopOp.begin(), mapper);
}
+
+ template <typename OpOperandsTy>
+ void populateReductionClauseOps(mlir::omp::LoopOp loopOp,
+ OpOperandsTy &clauseOps) const {
+ clauseOps.reductionMod = loopOp.getReductionModAttr();
+ clauseOps.reductionVars = loopOp.getReductionVars();
+
+ std::optional<mlir::ArrayAttr> reductionSyms = loopOp.getReductionSyms();
+ if (reductionSyms)
+ clauseOps.reductionSyms.assign(reductionSyms->begin(),
+ reductionSyms->end());
+
+ std::optional<llvm::ArrayRef<bool>> reductionByref =
+ loopOp.getReductionByref();
+ if (reductionByref)
+ clauseOps.reductionByref.assign(reductionByref->begin(),
+ reductionByref->end());
+ }
};
class GenericLoopConversionPass
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index ffa4a6ff24f24..7ee23ac5f5679 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -75,7 +75,7 @@ subroutine test_order()
subroutine test_reduction()
integer :: i, dummy = 1
- ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
+ ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"}
@@ -294,3 +294,39 @@ subroutine teams_loop_cannot_be_parallel_for_4
!$omp end parallel
END DO
end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction
+subroutine loop_parallel_bind_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>)
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"}
+ ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_reduction
+subroutine teams_loop_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index e992296c9a837..64094d61eb9a3 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,24 +1,12 @@
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
-
-omp.declare_reduction @add_reduction_i32 : i32 init {
- ^bb0(%arg0: i32):
- %c0_i32 = arith.constant 0 : i32
- omp.yield(%c0_i32 : i32)
- } combiner {
- ^bb0(%arg0: i32, %arg1: i32):
- %0 = arith.addi %arg0, %arg1 : i32
- omp.yield(%0 : i32)
- }
-
func.func @_QPloop_order() {
omp.teams {
%c0 = arith.constant 0 : i32
%c10 = arith.constant 10 : i32
%c1 = arith.constant 1 : i32
- %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"}
- // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.loop operation}}
- omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref<i32>) {
+ // expected-error@below {{not yet implemented: Unhandled clause order in omp.loop operation}}
+ omp.loop order(reproducible:concurrent) {
omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
omp.yield
}
|
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) ChangesExtends Full diff: https://github.com/llvm/llvm-project/pull/128849.diff 3 Files Affected:
diff --git a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
index bf94166edc079..7a9f65982e69c 100644
--- a/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp
@@ -15,6 +15,8 @@
#include "mlir/Transforms/DialectConversion.h"
#include <memory>
+#include <optional>
+#include <type_traits>
namespace flangomp {
#define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS
@@ -77,9 +79,6 @@ class GenericLoopConversionPattern
if (loopOp.getOrder())
return todo("order");
- if (!loopOp.getReductionVars().empty())
- return todo("reduction");
-
return mlir::success();
}
@@ -213,6 +212,7 @@ class GenericLoopConversionPattern
void rewriteToDistrbute(mlir::omp::LoopOp loopOp,
mlir::ConversionPatternRewriter &rewriter) const {
+ assert(loopOp.getReductionVars().empty());
rewriteToSingleWrapperOp<mlir::omp::DistributeOp,
mlir::omp::DistributeOperands>(loopOp, rewriter);
}
@@ -246,6 +246,12 @@ class GenericLoopConversionPattern
Fortran::common::openmp::EntryBlockArgs args;
args.priv.vars = clauseOps.privateVars;
+ if constexpr (!std::is_same_v<OpOperandsTy,
+ mlir::omp::DistributeOperands>) {
+ populateReductionClauseOps(loopOp, clauseOps);
+ args.reduction.vars = clauseOps.reductionVars;
+ }
+
auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps);
mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion());
@@ -288,20 +294,51 @@ class GenericLoopConversionPattern
rewriter.createBlock(&distributeOp.getRegion());
mlir::omp::WsloopOperands wsloopClauseOps;
+ populateReductionClauseOps(loopOp, wsloopClauseOps);
+ Fortran::common::openmp::EntryBlockArgs wsloopArgs;
+ wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
+
auto wsloopOp =
rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps);
wsloopOp.setComposite(true);
- rewriter.createBlock(&wsloopOp.getRegion());
+ mlir::Block *loopBlock =
+ genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion());
mlir::IRMapping mapper;
- mlir::Block &loopBlock = *loopOp.getRegion().begin();
- for (auto [loopOpArg, parallelOpArg] : llvm::zip_equal(
- loopBlock.getArguments(), parallelBlock->getArguments()))
+ auto loopBlockInterface =
+ llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp);
+
+ for (auto [loopOpArg, parallelOpArg] :
+ llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(),
+ parallelBlock->getArguments()))
mapper.map(loopOpArg, parallelOpArg);
+ for (auto [loopOpArg, wsloopOpArg] :
+ llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(),
+ loopBlock->getArguments()))
+ mapper.map(loopOpArg, wsloopOpArg);
+
rewriter.clone(*loopOp.begin(), mapper);
}
+
+ template <typename OpOperandsTy>
+ void populateReductionClauseOps(mlir::omp::LoopOp loopOp,
+ OpOperandsTy &clauseOps) const {
+ clauseOps.reductionMod = loopOp.getReductionModAttr();
+ clauseOps.reductionVars = loopOp.getReductionVars();
+
+ std::optional<mlir::ArrayAttr> reductionSyms = loopOp.getReductionSyms();
+ if (reductionSyms)
+ clauseOps.reductionSyms.assign(reductionSyms->begin(),
+ reductionSyms->end());
+
+ std::optional<llvm::ArrayRef<bool>> reductionByref =
+ loopOp.getReductionByref();
+ if (reductionByref)
+ clauseOps.reductionByref.assign(reductionByref->begin(),
+ reductionByref->end());
+ }
};
class GenericLoopConversionPass
diff --git a/flang/test/Lower/OpenMP/loop-directive.f90 b/flang/test/Lower/OpenMP/loop-directive.f90
index ffa4a6ff24f24..7ee23ac5f5679 100644
--- a/flang/test/Lower/OpenMP/loop-directive.f90
+++ b/flang/test/Lower/OpenMP/loop-directive.f90
@@ -75,7 +75,7 @@ subroutine test_order()
subroutine test_reduction()
integer :: i, dummy = 1
- ! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
+ ! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}} : !{{.*}}) reduction
! CHECK-SAME: (@[[RED]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]] : !{{.*}}) {
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_reductionEdummy"}
@@ -294,3 +294,39 @@ subroutine teams_loop_cannot_be_parallel_for_4
!$omp end parallel
END DO
end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction
+subroutine loop_parallel_bind_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: private(@{{[^[:space:]]+}} %{{[^[:space:]]+}}#0 -> %[[PRIV_ARG:[^[:space:]]+]] : !fir.ref<i32>)
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK-NEXT: hlfir.declare %[[PRIV_ARG]] {uniq_name = "_QF{{.*}}Ei"}
+ ! CHECK-NEXT: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPteams_loop_reduction
+subroutine teams_loop_reduction
+ implicit none
+ integer :: x, i
+
+ ! CHECK: omp.wsloop
+ ! CHECK-SAME: reduction(@add_reduction_i32 %{{.*}}#0 -> %[[RED_ARG:.*]] : !fir.ref<i32>) {
+ ! CHECK-NEXT: omp.loop_nest {{.*}} {
+ ! CHECK: hlfir.declare %[[RED_ARG]] {uniq_name = "_QF{{.*}}Ex"}
+ ! CHECK: }
+ ! CHECK: }
+ !$omp loop bind(parallel) reduction(+: x)
+ do i = 0, 10
+ x = x + i
+ end do
+end subroutine
diff --git a/flang/test/Transforms/generic-loop-rewriting-todo.mlir b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
index e992296c9a837..64094d61eb9a3 100644
--- a/flang/test/Transforms/generic-loop-rewriting-todo.mlir
+++ b/flang/test/Transforms/generic-loop-rewriting-todo.mlir
@@ -1,24 +1,12 @@
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
-
-omp.declare_reduction @add_reduction_i32 : i32 init {
- ^bb0(%arg0: i32):
- %c0_i32 = arith.constant 0 : i32
- omp.yield(%c0_i32 : i32)
- } combiner {
- ^bb0(%arg0: i32, %arg1: i32):
- %0 = arith.addi %arg0, %arg1 : i32
- omp.yield(%0 : i32)
- }
-
func.func @_QPloop_order() {
omp.teams {
%c0 = arith.constant 0 : i32
%c10 = arith.constant 10 : i32
%c1 = arith.constant 1 : i32
- %sum = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_orderEi"}
- // expected-error@below {{not yet implemented: Unhandled clause reduction in omp.loop operation}}
- omp.loop reduction(@add_reduction_i32 %sum -> %arg2 : !fir.ref<i32>) {
+ // expected-error@below {{not yet implemented: Unhandled clause order in omp.loop operation}}
+ omp.loop order(reproducible:concurrent) {
omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
omp.yield
}
|
8f2c57e to
5b88e7d
Compare
8ec979d to
20a2501
Compare
skatrak
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Kareem, a couple comments from me.
|
|
||
| void rewriteToDistrbute(mlir::omp::LoopOp loopOp, | ||
| mlir::ConversionPatternRewriter &rewriter) const { | ||
| assert(loopOp.getReductionVars().empty()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm guessing the compound directive splitting in the frontend already makes sure that !$omp teams loop reduction(...) only adds the reduction clause to the teams leaf construct, so that this assert is not triggered.
We probably want to make that check in the MLIR verifier, though, since we can't rely on Flang lowering being the only source of OpenMP dialect operations. If it's an omp.loop taking the implicit role of an omp.distribute, then we need to ensure it doesn't define a reduction clause.
We'd probably want to move the checks in GenericLoopConversionPattern::rewriteStandaloneLoop, GenericLoopConversionPattern::matchAndRewrite and related utility functions from here to an extraClassDeclaration of mlir::omp::LoopOp to do this properly. Something like LoopOpRole mlir::omp::LoopOp::getLoopRole(), where the possible values are simd, wsloop, distribute and distribute parallel wsloop.
This can come as a follow-up PR, not something to be implemented here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have a semantic check for this: #128823. Can look into the verifier improment in a later PR, makes sense.
| auto loopBlockInterface = | ||
| llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp); | ||
|
|
||
| for (auto [loopOpArg, parallelOpArg] : |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Please add a comment to explain why we can match private entry block arguments to all of the entry block arguments of omp.parallel, and the same below with respect to reduction and omp.loop.
My concern is that, if at some point we introduce any other entry block arguments to the omp.parallel or omp.loop operations, this here will break and troubleshooting it won't be straightforward.
Actually, I think that casting these ops to their BlockArgOpenMPOpInterface and accessing their arguments via get{Private,Reduction}BlockArgs makes this much more resilient to future updates.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Used the BlockArgOpenMPInterface to access the relevant args.
20a2501 to
5539c20
Compare
skatrak
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
| end subroutine | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPloop_parallel_bind_reduction | ||
| subroutine loop_parallel_bind_reduction |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add one of these reduction tests for the distribute parallel do case? That way we test the rewriteToDistributeParallelDo function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Extends `loop` directive transformation by adding support for the `reduction` clause.
5539c20 to
d096245
Compare
…lvm#128849) Extends `loop` directive transformation by adding support for the `reduction` clause.
Extends
loopdirective transformation by adding support for thereductionclause.