Skip to content

Commit 5c3fa17

Browse files
committed
[flang] Postpone hlfir.end_associate generation for calls.
If we generate hlfir.end_associate at the end of the statement, we get easier optimizable HLFIR, because there are no compiler generated operations with side-effects in between the call and the consumers. This allows more hlfir.eval_in_mem to reuse the LHS instead of allocating temporary buffer. I do not think the same can be done for hlfir.copy_out always, e.g.: ``` subroutine test2(x) interface function array_func2(x,y) real:: x(*), array_func2(10), y end function array_func2 end interface real :: x(:) x = array_func2(x, 1.0) end subroutine test2 ``` If we postpone the copy-out until after the assignment, then the result may be wrong.
1 parent 53fe3df commit 5c3fa17

File tree

2 files changed

+121
-6
lines changed

2 files changed

+121
-6
lines changed

flang/lib/Lower/ConvertCall.cpp

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -960,9 +960,26 @@ struct CallCleanUp {
960960
mlir::Value tempVar;
961961
mlir::Value mustFree;
962962
};
963-
void genCleanUp(mlir::Location loc, fir::FirOpBuilder &builder) {
964-
Fortran::common::visit([&](auto &c) { c.genCleanUp(loc, builder); },
963+
964+
/// Generate clean-up code.
965+
/// If \p postponeAssociates is true, the ExprAssociate clean-up
966+
/// is not generated, and instead the corresponding CallCleanUp
967+
/// object is returned as the result.
968+
std::optional<CallCleanUp> genCleanUp(mlir::Location loc,
969+
fir::FirOpBuilder &builder,
970+
bool postponeAssociates) {
971+
std::optional<CallCleanUp> postponed;
972+
Fortran::common::visit(Fortran::common::visitors{
973+
[&](CopyIn &c) { c.genCleanUp(loc, builder); },
974+
[&](ExprAssociate &c) {
975+
if (postponeAssociates)
976+
postponed = CallCleanUp{c};
977+
else
978+
c.genCleanUp(loc, builder);
979+
},
980+
},
965981
cleanUp);
982+
return postponed;
966983
}
967984
std::variant<CopyIn, ExprAssociate> cleanUp;
968985
};
@@ -1729,10 +1746,23 @@ genUserCall(Fortran::lower::PreparedActualArguments &loweredActuals,
17291746
caller, callSiteType, callContext.resultType,
17301747
callContext.isElementalProcWithArrayArgs());
17311748

1732-
/// Clean-up associations and copy-in.
1733-
for (auto cleanUp : callCleanUps)
1734-
cleanUp.genCleanUp(loc, builder);
1735-
1749+
// Clean-up associations and copy-in.
1750+
// The association clean-ups are postponed to the end of the statement
1751+
// lowering. The copy-in clean-ups may be delayed as well,
1752+
// but they are done immediately after the call currently.
1753+
llvm::SmallVector<CallCleanUp> associateCleanups;
1754+
for (auto cleanUp : callCleanUps) {
1755+
auto postponed =
1756+
cleanUp.genCleanUp(loc, builder, /*postponeAssociates=*/true);
1757+
if (postponed)
1758+
associateCleanups.push_back(*postponed);
1759+
}
1760+
1761+
fir::FirOpBuilder *bldr = &builder;
1762+
callContext.stmtCtx.attachCleanup([=]() {
1763+
for (auto cleanUp : associateCleanups)
1764+
(void)cleanUp.genCleanUp(loc, *bldr, /*postponeAssociates=*/false);
1765+
});
17361766
if (auto *entity = std::get_if<hlfir::EntityWithAttributes>(&loweredResult))
17371767
return *entity;
17381768

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
! RUN: bbc -emit-hlfir -o - %s -I nowhere | FileCheck %s
2+
3+
subroutine test1
4+
interface
5+
function array_func1(x)
6+
real:: x, array_func1(10)
7+
end function array_func1
8+
end interface
9+
real :: x(10)
10+
x = array_func1(1.0)
11+
end subroutine test1
12+
! CHECK-LABEL: func.func @_QPtest1() {
13+
! CHECK: %[[VAL_5:.*]] = arith.constant 1.000000e+00 : f32
14+
! CHECK: %[[VAL_6:.*]]:3 = hlfir.associate %[[VAL_5]] {adapt.valuebyref} : (f32) -> (!fir.ref<f32>, !fir.ref<f32>, i1)
15+
! CHECK: %[[VAL_17:.*]] = hlfir.eval_in_mem shape %{{.*}} : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
16+
! CHECK: fir.call @_QParray_func1
17+
! CHECK: fir.save_result
18+
! CHECK: }
19+
! CHECK: hlfir.assign %[[VAL_17]] to %{{.*}} : !hlfir.expr<10xf32>, !fir.ref<!fir.array<10xf32>>
20+
! CHECK: hlfir.end_associate %[[VAL_6]]#1, %[[VAL_6]]#2 : !fir.ref<f32>, i1
21+
22+
subroutine test2(x)
23+
interface
24+
function array_func2(x,y)
25+
real:: x(*), array_func2(10), y
26+
end function array_func2
27+
end interface
28+
real :: x(:)
29+
x = array_func2(x, 1.0)
30+
end subroutine test2
31+
! CHECK-LABEL: func.func @_QPtest2(
32+
! CHECK: %[[VAL_3:.*]] = arith.constant 1.000000e+00 : f32
33+
! CHECK: %[[VAL_4:.*]]:2 = hlfir.copy_in %{{.*}} to %{{.*}} : (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.box<!fir.array<?xf32>>, i1)
34+
! CHECK: %[[VAL_5:.*]] = fir.box_addr %[[VAL_4]]#0 : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
35+
! CHECK: %[[VAL_6:.*]]:3 = hlfir.associate %[[VAL_3]] {adapt.valuebyref} : (f32) -> (!fir.ref<f32>, !fir.ref<f32>, i1)
36+
! CHECK: %[[VAL_17:.*]] = hlfir.eval_in_mem shape %{{.*}} : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
37+
! CHECK: ^bb0(%[[VAL_18:.*]]: !fir.ref<!fir.array<10xf32>>):
38+
! CHECK: %[[VAL_19:.*]] = fir.call @_QParray_func2(%[[VAL_5]], %[[VAL_6]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<f32>) -> !fir.array<10xf32>
39+
! CHECK: fir.save_result %[[VAL_19]] to %[[VAL_18]](%{{.*}}) : !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>, !fir.shape<1>
40+
! CHECK: }
41+
! CHECK: hlfir.copy_out %{{.*}}, %[[VAL_4]]#1 to %{{.*}} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, i1, !fir.box<!fir.array<?xf32>>) -> ()
42+
! CHECK: hlfir.assign %[[VAL_17]] to %{{.*}} : !hlfir.expr<10xf32>, !fir.box<!fir.array<?xf32>>
43+
! CHECK: hlfir.end_associate %[[VAL_6]]#1, %[[VAL_6]]#2 : !fir.ref<f32>, i1
44+
! CHECK: hlfir.destroy %[[VAL_17]] : !hlfir.expr<10xf32>
45+
46+
subroutine test3(x)
47+
interface
48+
function array_func3(x)
49+
real :: x, array_func3(10)
50+
end function array_func3
51+
end interface
52+
logical :: x
53+
if (any(array_func3(1.0).le.array_func3(2.0))) x = .true.
54+
end subroutine test3
55+
! CHECK-LABEL: func.func @_QPtest3(
56+
! CHECK: %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
57+
! CHECK: %[[VAL_3:.*]]:3 = hlfir.associate %[[VAL_2]] {adapt.valuebyref} : (f32) -> (!fir.ref<f32>, !fir.ref<f32>, i1)
58+
! CHECK: %[[VAL_14:.*]] = hlfir.eval_in_mem shape %{{.*}} : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
59+
! CHECK: ^bb0(%[[VAL_15:.*]]: !fir.ref<!fir.array<10xf32>>):
60+
! CHECK: %[[VAL_16:.*]] = fir.call @_QParray_func3(%[[VAL_3]]#0) fastmath<contract> : (!fir.ref<f32>) -> !fir.array<10xf32>
61+
! CHECK: fir.save_result %[[VAL_16]] to %[[VAL_15]](%{{.*}}) : !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>, !fir.shape<1>
62+
! CHECK: }
63+
! CHECK: %[[VAL_17:.*]] = arith.constant 2.000000e+00 : f32
64+
! CHECK: %[[VAL_18:.*]]:3 = hlfir.associate %[[VAL_17]] {adapt.valuebyref} : (f32) -> (!fir.ref<f32>, !fir.ref<f32>, i1)
65+
! CHECK: %[[VAL_29:.*]] = hlfir.eval_in_mem shape %{{.*}} : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
66+
! CHECK: ^bb0(%[[VAL_30:.*]]: !fir.ref<!fir.array<10xf32>>):
67+
! CHECK: %[[VAL_31:.*]] = fir.call @_QParray_func3(%[[VAL_18]]#0) fastmath<contract> : (!fir.ref<f32>) -> !fir.array<10xf32>
68+
! CHECK: fir.save_result %[[VAL_31]] to %[[VAL_30]](%{{.*}}) : !fir.array<10xf32>, !fir.ref<!fir.array<10xf32>>, !fir.shape<1>
69+
! CHECK: }
70+
! CHECK: %[[VAL_32:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
71+
! CHECK: ^bb0(%[[VAL_33:.*]]: index):
72+
! CHECK: %[[VAL_34:.*]] = hlfir.apply %[[VAL_14]], %[[VAL_33]] : (!hlfir.expr<10xf32>, index) -> f32
73+
! CHECK: %[[VAL_35:.*]] = hlfir.apply %[[VAL_29]], %[[VAL_33]] : (!hlfir.expr<10xf32>, index) -> f32
74+
! CHECK: %[[VAL_36:.*]] = arith.cmpf ole, %[[VAL_34]], %[[VAL_35]] fastmath<contract> : f32
75+
! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i1) -> !fir.logical<4>
76+
! CHECK: hlfir.yield_element %[[VAL_37]] : !fir.logical<4>
77+
! CHECK: }
78+
! CHECK: %[[VAL_38:.*]] = hlfir.any %[[VAL_32]] : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
79+
! CHECK: hlfir.destroy %[[VAL_32]] : !hlfir.expr<?x!fir.logical<4>>
80+
! CHECK: hlfir.end_associate %[[VAL_18]]#1, %[[VAL_18]]#2 : !fir.ref<f32>, i1
81+
! CHECK: hlfir.destroy %[[VAL_29]] : !hlfir.expr<10xf32>
82+
! CHECK: hlfir.end_associate %[[VAL_3]]#1, %[[VAL_3]]#2 : !fir.ref<f32>, i1
83+
! CHECK: hlfir.destroy %[[VAL_14]] : !hlfir.expr<10xf32>
84+
! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (!fir.logical<4>) -> i1
85+
! CHECK: fir.if %[[VAL_39]] {

0 commit comments

Comments
 (0)