diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 91df8672c20d9..54892ef99bf58 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -21,6 +21,7 @@ #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" #include "flang/Optimizer/OpenMP/Passes.h" +#include "flang/Optimizer/Support/Utils.h" #include "flang/Optimizer/Transforms/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Dominance.h" @@ -786,13 +787,55 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( mlir::Value shape = hlfir::genShape(loc, builder, lhs); llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); - hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, - flangomp::shouldUseWorkshareLowering(assign)); - builder.setInsertionPointToStart(loopNest.body); - auto arrayElement = - hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); - builder.create(loc, rhs, arrayElement); + + if (lhs.isSimplyContiguous() && extents.size() > 1) { + // Flatten the array to use a single assign loop, that can be better + // optimized. + mlir::Value n = extents[0]; + for (size_t i = 1; i < extents.size(); ++i) + n = builder.create(loc, n, extents[i]); + llvm::SmallVector flatExtents = {n}; + + mlir::Type flatArrayType; + mlir::Value flatArray = lhs.getBase(); + if (mlir::isa(lhs.getType())) { + shape = builder.genShape(loc, flatExtents); + flatArrayType = fir::BoxType::get(fir::SequenceType::get(eleTy, 1)); + flatArray = builder.create(loc, flatArrayType, flatArray, + shape, /*slice=*/mlir::Value{}); + } else { + // Array references must have fixed shape, when used in assignments. + int64_t flatExtent = 1; + for (const mlir::Value &extent : extents) { + mlir::Operation *op = extent.getDefiningOp(); + assert(op && "no defining operation for constant array extent"); + flatExtent *= fir::toInt(mlir::cast(*op)); + } + + flatArrayType = + fir::ReferenceType::get(fir::SequenceType::get({flatExtent}, eleTy)); + flatArray = builder.createConvert(loc, flatArrayType, flatArray); + } + + hlfir::LoopNest loopNest = + hlfir::genLoopNest(loc, builder, flatExtents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); + builder.setInsertionPointToStart(loopNest.body); + + mlir::Value arrayElement = + builder.create(loc, fir::ReferenceType::get(eleTy), + flatArray, loopNest.oneBasedIndices); + builder.create(loc, rhs, arrayElement); + } else { + hlfir::LoopNest loopNest = + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(assign)); + builder.setInsertionPointToStart(loopNest.body); + auto arrayElement = + hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); + builder.create(loc, rhs, arrayElement); + } + rewriter.eraseOp(assign); return mlir::success(); } diff --git a/flang/test/HLFIR/opt-scalar-assign.fir b/flang/test/HLFIR/opt-scalar-assign.fir index 02ab02945b042..74cdcd9622adb 100644 --- a/flang/test/HLFIR/opt-scalar-assign.fir +++ b/flang/test/HLFIR/opt-scalar-assign.fir @@ -12,18 +12,18 @@ func.func @_QPtest1() { return } // CHECK-LABEL: func.func @_QPtest1() { -// CHECK: %[[VAL_0:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_2:.*]] = arith.constant 11 : index -// CHECK: %[[VAL_3:.*]] = arith.constant 13 : index -// CHECK: %[[VAL_4:.*]] = fir.alloca !fir.array<11x13xf32> {bindc_name = "x", uniq_name = "_QFtest1Ex"} -// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_2]], %[[VAL_3]] : (index, index) -> !fir.shape<2> -// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_5]]) {uniq_name = "_QFtest1Ex"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) -// CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_0]] to %[[VAL_3]] step %[[VAL_0]] unordered { -// CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_0]] to %[[VAL_2]] step %[[VAL_0]] unordered { -// CHECK: %[[VAL_9:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_8]], %[[VAL_7]]) : (!fir.ref>, index, index) -> !fir.ref -// CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_9]] : f32, !fir.ref -// CHECK: } +// CHECK: %[[VAL_0:.*]] = arith.constant 143 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_3:.*]] = arith.constant 11 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 13 : index +// CHECK: %[[VAL_5:.*]] = fir.alloca !fir.array<11x13xf32> {bindc_name = "x", uniq_name = "_QFtest1Ex"} +// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_3]], %[[VAL_4]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_6]]) {uniq_name = "_QFtest1Ex"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#0 : (!fir.ref>) -> !fir.ref> +// CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_1]] to %[[VAL_0]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_9]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_10]] : f32, !fir.ref // CHECK: } // CHECK: return // CHECK: } @@ -129,3 +129,29 @@ func.func @_QPtest5(%arg0: !fir.ref>> {fir.bindc_name // CHECK: } // CHECK: return // CHECK: } + +func.func @_QPtest6(%arg0: !fir.ref>>> {fir.bindc_name = "x"}) { + %c0_i32 = arith.constant 0 : i32 + %0:2 = hlfir.declare %arg0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest6Ex"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + hlfir.assign %c0_i32 to %0#0 realloc : i32, !fir.ref>>> + return +} + +// CHECK-LABEL: func.func @_QPtest6( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>> {fir.bindc_name = "x"}) { +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest6Ex"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +// CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref>>> +// CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_5]], %[[VAL_2]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_5]], %[[VAL_1]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_6]]#1, %[[VAL_7]]#1 : index +// CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_8]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_10:.*]] = fir.rebox %[[VAL_5]](%[[VAL_9]]) : (!fir.box>>, !fir.shape<1>) -> !fir.box> +// CHECK: fir.do_loop %[[VAL_11:.*]] = %[[VAL_1]] to %[[VAL_8]] step %[[VAL_1]] unordered { +// CHECK: %[[VAL_12:.*]] = hlfir.designate %[[VAL_10]] (%[[VAL_11]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_3]] to %[[VAL_12]] : i32, !fir.ref +// CHECK: } +// CHECK: return +// CHECK: }