Skip to content

Commit f857bef

Browse files
authored
[flang][hlfir] Shallow copy elemental results with allocatable components. (llvm#68040)
To avoid the overhead of deallocating allocatable components of the elemental temporary result on every iteration of the elemental operation, we can use a shallow copy instead of deep-copy assign.
1 parent e0cd781 commit f857bef

File tree

2 files changed

+60
-20
lines changed

2 files changed

+60
-20
lines changed

flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -791,26 +791,35 @@ struct ElementalOpConversion
791791
// Assign the element value to the temp element for this iteration.
792792
auto tempElement =
793793
hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices);
794-
// FIXME: if the elemental result is a function result temporary
795-
// of a derived type, we have to make sure that we are either
796-
// deallocate any allocatable/automatic components after the assignment
797-
// or that we do not do the deep copy with the AssignOp. The latter
798-
// seems to be preferrable, because the deep copy is more expensive.
799-
// The shallow copy may be done with a load/store of the RecordType scalar.
800-
builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
801-
/*realloc=*/false,
802-
/*keep_lhs_length_if_realloc=*/false,
803-
/*temporary_lhs=*/true);
804-
// hlfir.yield_element implicitly marks the end-of-life its operand if
805-
// it is an expression created in the hlfir.elemental (since it is its
806-
// last use and an hlfir.destroy could not be created afterwards)
807-
// Now that this node has been removed and the expression has been used in
808-
// the assign, insert an hlfir.destroy to mark the expression end-of-life.
809-
// If the expression creation allocated a buffer on the heap inside the
810-
// loop, this will ensure the buffer properly deallocated.
811-
if (elementValue.getType().isa<hlfir::ExprType>() &&
812-
wasCreatedInCurrentBlock(elementValue, builder))
813-
builder.create<hlfir::DestroyOp>(loc, elementValue);
794+
// If the elemental result is a temporary of a derived type,
795+
// we can avoid the deep copy implied by the AssignOp and just
796+
// do the shallow copy with load/store. This helps avoiding the overhead
797+
// of deallocating allocatable components of the temporary (if any)
798+
// on each iteration of the elemental operation.
799+
auto asExpr = elementValue.getDefiningOp<hlfir::AsExprOp>();
800+
auto elemType = hlfir::getFortranElementType(elementValue.getType());
801+
if (asExpr && asExpr.isMove() && mlir::isa<fir::RecordType>(elemType) &&
802+
hlfir::mayHaveAllocatableComponent(elemType) &&
803+
wasCreatedInCurrentBlock(elementValue, builder)) {
804+
auto load = builder.create<fir::LoadOp>(loc, asExpr.getVar());
805+
builder.create<fir::StoreOp>(loc, load, tempElement);
806+
} else {
807+
builder.create<hlfir::AssignOp>(loc, elementValue, tempElement,
808+
/*realloc=*/false,
809+
/*keep_lhs_length_if_realloc=*/false,
810+
/*temporary_lhs=*/true);
811+
812+
// hlfir.yield_element implicitly marks the end-of-life its operand if
813+
// it is an expression created in the hlfir.elemental (since it is its
814+
// last use and an hlfir.destroy could not be created afterwards)
815+
// Now that this node has been removed and the expression has been used in
816+
// the assign, insert an hlfir.destroy to mark the expression end-of-life.
817+
// If the expression creation allocated a buffer on the heap inside the
818+
// loop, this will ensure the buffer properly deallocated.
819+
if (elementValue.getType().isa<hlfir::ExprType>() &&
820+
wasCreatedInCurrentBlock(elementValue, builder))
821+
builder.create<hlfir::DestroyOp>(loc, elementValue);
822+
}
814823
builder.restoreInsertionPoint(insPt);
815824

816825
mlir::Value bufferizedExpr =
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Check that an elemental result of a derived type with an allocatable
2+
// component is shallow-copied into the array result.
3+
// RUN: fir-opt %s --bufferize-hlfir | FileCheck %s
4+
5+
func.func @_QMtypesPtest() {
6+
%false = arith.constant false
7+
%c1 = arith.constant 1 : index
8+
%0 = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
9+
%11 = fir.shape %c1 : (index) -> !fir.shape<1>
10+
%18 = fir.alloca !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = "y", uniq_name = "_QMtypesFtestEy"}
11+
%19:2 = hlfir.declare %18(%11) {uniq_name = "_QMtypesFtestEy"} : (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
12+
%23 = hlfir.elemental %11 : (!fir.shape<1>) -> !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {
13+
^bb0(%arg0: index):
14+
%26:2 = hlfir.declare %0 {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
15+
%27 = hlfir.as_expr %26#0 move %false : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, i1) -> !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
16+
hlfir.yield_element %27 : !hlfir.expr<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
17+
}
18+
hlfir.assign %23 to %19#0 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>
19+
hlfir.destroy %23 : !hlfir.expr<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
20+
return
21+
}
22+
// CHECK-LABEL: func.func @_QMtypesPtest() {
23+
// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}> {bindc_name = ".result"}
24+
// CHECK: %[[VAL_6:.*]] = fir.allocmem !fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>> {bindc_name = ".tmp.array", uniq_name = ""}
25+
// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%{{.*}}) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.shape<1>) -> (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, !fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>)
26+
// CHECK: fir.do_loop %[[VAL_10:.*]] = %{{.*}} to %{{.*}} step %{{.*}} {
27+
// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>, !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>)
28+
// CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_10]]) : (!fir.heap<!fir.array<1x!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>>, index) -> !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
29+
// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
30+
// CHECK: fir.store %[[VAL_16]] to %[[VAL_15]] : !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}>>
31+
// CHECK: }

0 commit comments

Comments
 (0)