-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[flang][acc] remap symbol appearing in reduction clause #168876
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-openacc Author: None (jeanPerier) ChangesThis patch is a follow-up of #162306 for the reduction clause. Inside the compute region that carries the reduction clause, a new hlfir.declare is generated for symbol appearing in the reduction clause. The input of this hlfir.declare is the acc.reduction result. The related semantics::Symbol is remapped to the hlfir.declare result so that any reference to the symbol inside the compute region will use this SSA value as the starting point instead of the SSA value for the host address. Full diff: https://github.com/llvm/llvm-project/pull/168876.diff 2 Files Affected:
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 98a3aced3f528..d10169e236471 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1622,16 +1622,18 @@ static bool isSupportedReductionType(mlir::Type ty) {
return fir::isa_trivial(ty);
}
-static void
-genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
- Fortran::lower::AbstractConverter &converter,
- Fortran::semantics::SemanticsContext &semanticsContext,
- Fortran::lower::StatementContext &stmtCtx,
- llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
- llvm::SmallVector<mlir::Attribute> &reductionRecipes,
- llvm::ArrayRef<mlir::Value> async,
- llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
- llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
+static void genReductions(
+ const Fortran::parser::AccObjectListWithReduction &objectList,
+ Fortran::lower::AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
+ llvm::SmallVector<mlir::Attribute> &reductionRecipes,
+ llvm::ArrayRef<mlir::Value> async,
+ llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
+ llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
+ llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ *symbolPairs = nullptr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t);
const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t);
@@ -1644,6 +1646,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
[&](auto &&s) { return ea.Analyze(s); }, accObject.u);
+ bool isWholeSymbol =
+ !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator);
fir::factory::AddrAndBoundsInfo info =
Fortran::lower::gatherDataOperandAddrAndBounds<
mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
@@ -1680,6 +1684,11 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
reductionRecipes.push_back(mlir::SymbolRefAttr::get(
builder.getContext(), recipe.getSymName().str()));
reductionOperands.push_back(op.getAccVar());
+ // Track the symbol and its corresponding mlir::Value if requested so that
+ // accesses inside the compute/loop regions use the acc.reduction variable.
+ if (symbolPairs && isWholeSymbol)
+ symbolPairs->emplace_back(op.getAccVar(),
+ Fortran::semantics::SymbolRef(symbol));
}
}
@@ -2545,7 +2554,8 @@ static mlir::acc::LoopOp createLoopOp(
&clause.u)) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, /*async=*/{},
- /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
+ &dataOperandSymbolPairs);
} else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
for (auto crtDeviceTypeAttr : crtDeviceTypes)
seqDeviceTypes.push_back(crtDeviceTypeAttr);
@@ -2995,7 +3005,8 @@ static Op createComputeOp(
if (!combinedConstructs) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, async,
- asyncDeviceTypes, asyncOnlyDeviceTypes);
+ asyncDeviceTypes, asyncOnlyDeviceTypes,
+ &dataOperandSymbolPairs);
} else {
auto crtDataStart = dataClauseOperands.size();
genDataOperandOperations<mlir::acc::CopyinOp>(
diff --git a/flang/test/Lower/OpenACC/acc-reduction-remapping.f90 b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
new file mode 100644
index 0000000000000..6ee365f278678
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
@@ -0,0 +1,160 @@
+! Test remapping of variables appearing in OpenACC reduction clause
+! to the related acc dialect data operation result.
+
+! This tests checks how the hlfir.declare is recreated and used inside
+! the acc compute region.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+subroutine scalar_combined(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine scalar_split(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_combined(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_split(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPscalar_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: acc.parallel {{.*}} reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED_PAR:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DECLARE_I:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFscalar_combinedEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_0:.*]] to %[[DECLARE_I]]#0 : !fir.ref<i32>
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED_PAR]]#0 : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.ref<!fir.array<100xf32>>, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED_PAR]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+!
+!
+! CHECK-LABEL: func.func @_QPscalar_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[CREATE_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y_PAR]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {{.*}} {
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED]]#0 : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_N:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 3 {uniq_name = "_QFarray_combinedEn"} : (!fir.ref<i64>, !fir.dscope) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_Y:.*]] = fir.box_addr %[[CREATE_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_Y]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y_PAR]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {{.*}} {
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_LOOP_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
|
🐧 Linux x64 Test Results
|
razvanlupusoru
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you!
This patch is a follow-up of llvm#162306 for the reduction clause. Inside the compute region that carries the reduction clause, a new hlfir.declare is generated for symbol appearing in the reduction clause. The input of this hlfir.declare is the acc.reduction result. The related semantics::Symbol is remapped to the hlfir.declare result so that any reference to the symbol inside the compute region will use this SSA value as the starting point instead of the SSA value for the host address.
This patch is a follow-up of llvm#162306 for the reduction clause. Inside the compute region that carries the reduction clause, a new hlfir.declare is generated for symbol appearing in the reduction clause. The input of this hlfir.declare is the acc.reduction result. The related semantics::Symbol is remapped to the hlfir.declare result so that any reference to the symbol inside the compute region will use this SSA value as the starting point instead of the SSA value for the host address.
This patch is a follow-up of #162306 for the reduction clause.
Inside the compute region that carries the reduction clause, a new hlfir.declare is generated for symbol appearing in the reduction clause. The input of this hlfir.declare is the acc.reduction result. The related semantics::Symbol is remapped to the hlfir.declare result so that any reference to the symbol inside the compute region will use this SSA value as the starting point instead of the SSA value for the host address.