Skip to content

Commit 077a280

Browse files
authored
[flang][acc] remap symbol appearing in reduction clause (#168876)
This patch is a follow-up of #162306 for the reduction clause. Inside the compute region that carries the reduction clause, a new hlfir.declare is generated for symbol appearing in the reduction clause. The input of this hlfir.declare is the acc.reduction result. The related semantics::Symbol is remapped to the hlfir.declare result so that any reference to the symbol inside the compute region will use this SSA value as the starting point instead of the SSA value for the host address.
1 parent b37b307 commit 077a280

File tree

2 files changed

+183
-12
lines changed

2 files changed

+183
-12
lines changed

flang/lib/Lower/OpenACC.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,16 +1622,18 @@ static bool isSupportedReductionType(mlir::Type ty) {
16221622
return fir::isa_trivial(ty);
16231623
}
16241624

1625-
static void
1626-
genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
1627-
Fortran::lower::AbstractConverter &converter,
1628-
Fortran::semantics::SemanticsContext &semanticsContext,
1629-
Fortran::lower::StatementContext &stmtCtx,
1630-
llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
1631-
llvm::SmallVector<mlir::Attribute> &reductionRecipes,
1632-
llvm::ArrayRef<mlir::Value> async,
1633-
llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
1634-
llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
1625+
static void genReductions(
1626+
const Fortran::parser::AccObjectListWithReduction &objectList,
1627+
Fortran::lower::AbstractConverter &converter,
1628+
Fortran::semantics::SemanticsContext &semanticsContext,
1629+
Fortran::lower::StatementContext &stmtCtx,
1630+
llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
1631+
llvm::SmallVector<mlir::Attribute> &reductionRecipes,
1632+
llvm::ArrayRef<mlir::Value> async,
1633+
llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
1634+
llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
1635+
llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
1636+
*symbolPairs = nullptr) {
16351637
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
16361638
const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t);
16371639
const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t);
@@ -1644,6 +1646,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
16441646
Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
16451647
Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
16461648
[&](auto &&s) { return ea.Analyze(s); }, accObject.u);
1649+
bool isWholeSymbol =
1650+
!designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator);
16471651
fir::factory::AddrAndBoundsInfo info =
16481652
Fortran::lower::gatherDataOperandAddrAndBounds<
16491653
mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
@@ -1680,6 +1684,11 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
16801684
reductionRecipes.push_back(mlir::SymbolRefAttr::get(
16811685
builder.getContext(), recipe.getSymName().str()));
16821686
reductionOperands.push_back(op.getAccVar());
1687+
// Track the symbol and its corresponding mlir::Value if requested so that
1688+
// accesses inside the compute/loop regions use the acc.reduction variable.
1689+
if (symbolPairs && isWholeSymbol)
1690+
symbolPairs->emplace_back(op.getAccVar(),
1691+
Fortran::semantics::SymbolRef(symbol));
16831692
}
16841693
}
16851694

@@ -2545,7 +2554,8 @@ static mlir::acc::LoopOp createLoopOp(
25452554
&clause.u)) {
25462555
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
25472556
reductionOperands, reductionRecipes, /*async=*/{},
2548-
/*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
2557+
/*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
2558+
&dataOperandSymbolPairs);
25492559
} else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
25502560
for (auto crtDeviceTypeAttr : crtDeviceTypes)
25512561
seqDeviceTypes.push_back(crtDeviceTypeAttr);
@@ -2995,7 +3005,8 @@ static Op createComputeOp(
29953005
if (!combinedConstructs) {
29963006
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
29973007
reductionOperands, reductionRecipes, async,
2998-
asyncDeviceTypes, asyncOnlyDeviceTypes);
3008+
asyncDeviceTypes, asyncOnlyDeviceTypes,
3009+
&dataOperandSymbolPairs);
29993010
} else {
30003011
auto crtDataStart = dataClauseOperands.size();
30013012
genDataOperandOperations<mlir::acc::CopyinOp>(
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
! Test remapping of variables appearing in OpenACC reduction clause
2+
! to the related acc dialect data operation result.
3+
4+
! This tests checks how the hlfir.declare is recreated and used inside
5+
! the acc compute region.
6+
7+
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
8+
9+
subroutine scalar_combined(x, y)
10+
real :: y, x(100)
11+
!$acc parallel copyin(x) reduction(+:y)
12+
do i=1,100
13+
y = y + x(i)
14+
end do
15+
!$acc end parallel
16+
end subroutine
17+
18+
subroutine scalar_split(x, y)
19+
real :: y, x(100)
20+
!$acc parallel copyin(x) copyout(y)
21+
!$acc loop reduction(+:y)
22+
do i=1,100
23+
y = y + x(i)
24+
end do
25+
!$acc end parallel
26+
end subroutine
27+
28+
subroutine array_combined(x, y, n)
29+
integer(8) :: n
30+
real :: y(n), x(100, n)
31+
!$acc parallel copyin(x) reduction(+:y)
32+
do j=1,n
33+
do i=1,100
34+
y(j) = y(j) + x(i, j)
35+
end do
36+
end do
37+
!$acc end parallel
38+
end subroutine
39+
40+
subroutine array_split(x, y, n)
41+
integer(8) :: n
42+
real :: y(n), x(100, n)
43+
!$acc parallel copyin(x) copyout(y)
44+
!$acc loop reduction(+:y)
45+
do j=1,n
46+
do i=1,100
47+
y(j) = y(j) + x(i, j)
48+
end do
49+
end do
50+
!$acc end parallel
51+
end subroutine
52+
53+
! CHECK-LABEL: func.func @_QPscalar_combined(
54+
! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
55+
! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
56+
! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
57+
! CHECK: acc.parallel {{.*}} reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {
58+
! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
59+
! CHECK: %[[DECLARE_RED_PAR:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
60+
! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
61+
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
62+
! CHECK: %[[DECLARE_I:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFscalar_combinedEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
63+
! CHECK: fir.store %[[VAL_0:.*]] to %[[DECLARE_I]]#0 : !fir.ref<i32>
64+
! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED_PAR]]#0 : !fir.ref<f32>
65+
! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.ref<!fir.array<100xf32>>, i64) -> !fir.ref<f32>
66+
! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
67+
! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
68+
! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED_PAR]]#0 : f32, !fir.ref<f32>
69+
! CHECK: acc.yield
70+
! CHECK: }
71+
! CHECK: acc.yield
72+
! CHECK: }
73+
! CHECK: return
74+
! CHECK: }
75+
!
76+
!
77+
! CHECK-LABEL: func.func @_QPscalar_split(
78+
! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
79+
! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
80+
! CHECK: %[[CREATE_Y:.*]] = acc.create varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
81+
! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
82+
! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
83+
! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[CREATE_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
84+
! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y_PAR]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
85+
! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
86+
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {{.*}} {
87+
! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
88+
! CHECK: %[[DECLARE_RED:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
89+
! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED]]#0 : !fir.ref<f32>
90+
! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
91+
! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED]]#0 : f32, !fir.ref<f32>
92+
! CHECK: acc.yield
93+
! CHECK: }
94+
! CHECK: acc.yield
95+
! CHECK: }
96+
! CHECK: return
97+
! CHECK: }
98+
99+
100+
! CHECK-LABEL: func.func @_QParray_combined(
101+
! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
102+
! CHECK: %[[DECLARE_N:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 3 {uniq_name = "_QFarray_combinedEn"} : (!fir.ref<i64>, !fir.dscope) -> (!fir.ref<i64>, !fir.ref<i64>)
103+
! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
104+
! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
105+
! CHECK: acc.parallel dataOperands({{.*}}) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {
106+
! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
107+
! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
108+
! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
109+
! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
110+
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) {{.*}} {
111+
! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
112+
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
113+
! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
114+
! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
115+
! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
116+
! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
117+
! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
118+
! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
119+
! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
120+
! CHECK: acc.yield
121+
! CHECK: }
122+
! CHECK: acc.yield
123+
! CHECK: }
124+
! CHECK: acc.yield
125+
! CHECK: }
126+
! CHECK: return
127+
! CHECK: }
128+
129+
130+
! CHECK-LABEL: func.func @_QParray_split(
131+
! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
132+
! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
133+
! CHECK: %[[CREATE_Y:.*]] = acc.create var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
134+
! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
135+
! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
136+
! CHECK: %[[BOX_ADDR_Y:.*]] = fir.box_addr %[[CREATE_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
137+
! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_Y]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
138+
! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y_PAR]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
139+
! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
140+
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {{.*}} {
141+
! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
142+
! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
143+
! CHECK: %[[DECLARE_Y_LOOP_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
144+
! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
145+
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
146+
! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
147+
! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
148+
! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
149+
! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
150+
! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
151+
! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
152+
! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
153+
! CHECK: acc.yield
154+
! CHECK: }
155+
! CHECK: acc.yield
156+
! CHECK: }
157+
! CHECK: acc.yield
158+
! CHECK: }
159+
! CHECK: return
160+
! CHECK: }

0 commit comments

Comments
 (0)