From 78206935db8c87bb9e2c89da3574b933406f112e Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Thu, 10 Apr 2025 14:04:52 +0000 Subject: [PATCH 1/7] [flang] Inline hlfir.copy_in for trivial types hlfir.copy_in implements copying non-contiguous array slices for functions that take in arrays required to be contiguous through a flang-rt function that calls memcpy/memmove separately on each element. For large arrays of trivial types, this can incur considerable overhead compared to a plain copy loop that is better able to take advantage of hardware pipelines. To address that, extend the InlineHLFIRAssign optimisation pass with a new pattern for inlining hlfir.copy_in operations for trivial types. For the time being, the pattern is only applied in cases where the copy-in does not require a corresponding copy-out, such as when the function being called declares the array parameter as intent(in). Applying this optimisation reduces the runtime of thornado-mini's DeleptonizationProblem by a factor of about 1/3rd. Signed-off-by: Kajetan Puchalski --- .../HLFIR/Transforms/InlineHLFIRAssign.cpp | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp index 6e209cce07ad4..38c684eaceb7d 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp @@ -13,6 +13,7 @@ #include "flang/Optimizer/Analysis/AliasAnalysis.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" #include "flang/Optimizer/OpenMP/Passes.h" @@ -127,6 +128,121 @@ class InlineHLFIRAssignConversion } }; +class InlineCopyInConversion : public mlir::OpRewritePattern { +public: + using mlir::OpRewritePattern::OpRewritePattern; + + llvm::LogicalResult + matchAndRewrite(hlfir::CopyInOp copyIn, + mlir::PatternRewriter &rewriter) const override; +}; + +llvm::LogicalResult +InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, + mlir::PatternRewriter &rewriter) const { + fir::FirOpBuilder builder(rewriter, copyIn.getOperation()); + mlir::Location loc = copyIn.getLoc(); + hlfir::Entity inputVariable{copyIn.getVar()}; + if (!fir::isa_trivial(inputVariable.getFortranElementType())) + return rewriter.notifyMatchFailure(copyIn, + "CopyInOp's data type is not trivial"); + + if (fir::isPointerType(inputVariable.getType())) + return rewriter.notifyMatchFailure( + copyIn, "CopyInOp's input variable is a pointer"); + + // There should be exactly one user of WasCopied - the corresponding + // CopyOutOp. + if (copyIn.getWasCopied().getUses().empty()) + return rewriter.notifyMatchFailure(copyIn, + "CopyInOp's WasCopied has no uses"); + // The copy out should always be present, either to actually copy or just + // deallocate memory. + auto *copyOut = + copyIn.getWasCopied().getUsers().begin().getCurrent().getUser(); + + if (!mlir::isa(copyOut)) + return rewriter.notifyMatchFailure(copyIn, + "CopyInOp has no direct CopyOut"); + + // Only inline the copy_in when copy_out does not need to be done, i.e. in + // case of intent(in). + if (::llvm::cast(copyOut).getVar()) + return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out"); + + inputVariable = + hlfir::derefPointersAndAllocatables(loc, builder, inputVariable); + mlir::Type resultAddrType = copyIn.getCopiedIn().getType(); + mlir::Value isContiguous = + builder.create(loc, inputVariable); + auto results = + builder + .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous, + /*withElseRegion=*/true) + .genThen([&]() { + mlir::Value falseVal = builder.create( + loc, builder.getI1Type(), builder.getBoolAttr(false)); + builder.create( + loc, mlir::ValueRange{inputVariable, falseVal}); + }) + .genElse([&] { + auto [temp, cleanup] = + hlfir::createTempFromMold(loc, builder, inputVariable); + mlir::Value shape = hlfir::genShape(loc, builder, inputVariable); + llvm::SmallVector extents = + hlfir::getIndexExtents(loc, builder, shape); + hlfir::LoopNest loopNest = hlfir::genLoopNest( + loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(copyIn)); + builder.setInsertionPointToStart(loopNest.body); + auto elem = hlfir::getElementAt(loc, builder, inputVariable, + loopNest.oneBasedIndices); + elem = hlfir::loadTrivialScalar(loc, builder, elem); + auto tempElem = hlfir::getElementAt(loc, builder, temp, + loopNest.oneBasedIndices); + builder.create(loc, elem, tempElem); + builder.setInsertionPointAfter(loopNest.outerOp); + + mlir::Value result; + // Make sure the result is always a boxed array by boxing it + // ourselves if need be. + if (mlir::isa(temp.getType())) { + result = temp; + } else { + auto refTy = + fir::ReferenceType::get(temp.getElementOrSequenceType()); + auto refVal = builder.createConvert(loc, refTy, temp); + result = + builder.create(loc, resultAddrType, refVal); + } + + builder.create(loc, + mlir::ValueRange{result, cleanup}); + }) + .getResults(); + + auto addr = results[0]; + auto needsCleanup = results[1]; + + builder.setInsertionPoint(copyOut); + builder.genIfOp(loc, {}, needsCleanup, false).genThen([&] { + auto boxAddr = builder.create(loc, addr); + auto heapType = fir::HeapType::get(fir::BoxValue(addr).getBaseTy()); + auto heapVal = builder.createConvert(loc, heapType, boxAddr.getResult()); + builder.create(loc, heapVal); + }); + rewriter.eraseOp(copyOut); + + auto tempBox = copyIn.getTempBox(); + + rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)}); + + // The TempBox is only needed for flang-rt calls which we're no longer + // generating. + rewriter.eraseOp(tempBox.getDefiningOp()); + return mlir::success(); +} + class InlineHLFIRAssignPass : public hlfir::impl::InlineHLFIRAssignBase { public: @@ -140,6 +256,7 @@ class InlineHLFIRAssignPass mlir::RewritePatternSet patterns(context); patterns.insert(context); + patterns.insert(context); if (mlir::failed(mlir::applyPatternsGreedily( getOperation(), std::move(patterns), config))) { From 154b758b0da725c0d0f9b41cdc3713a05e2239a7 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Wed, 7 May 2025 16:04:07 +0000 Subject: [PATCH 2/7] Add tests Signed-off-by: Kajetan Puchalski --- flang/test/HLFIR/inline-hlfir-assign.fir | 144 +++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/flang/test/HLFIR/inline-hlfir-assign.fir b/flang/test/HLFIR/inline-hlfir-assign.fir index f834e7971e3d5..df7681b9c5c16 100644 --- a/flang/test/HLFIR/inline-hlfir-assign.fir +++ b/flang/test/HLFIR/inline-hlfir-assign.fir @@ -353,3 +353,147 @@ func.func @_QPtest_expr_rhs(%arg0: !fir.ref> // CHECK: return // CHECK: } + +// Test inlining of hlfir.copy_in that does not require the array to be copied out +func.func private @_test_inline_copy_in(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "i"}, %arg2: !fir.ref {fir.bindc_name = "j"}) { + %0 = fir.alloca !fir.box>> + %1 = fir.dummy_scope : !fir.dscope + %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %5 = fir.load %2#0 : !fir.ref + %6 = fir.convert %5 : (i32) -> i64 + %c1 = arith.constant 1 : index + %c1_0 = arith.constant 1 : index + %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box>, index) -> (index, index, index) + %c1_1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %8 = arith.subi %7#1, %c1 : index + %9 = arith.addi %8, %c1_1 : index + %10 = arith.divsi %9, %c1_1 : index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.load %3#0 : !fir.ref + %14 = fir.convert %13 : (i32) -> i64 + %15 = fir.shape %12 : (index) -> !fir.shape<1> + %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> + %c100_i32 = arith.constant 100 : i32 + %17:2 = hlfir.copy_in %16 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) + %18 = fir.box_addr %17#0 : (!fir.box>) -> !fir.ref> + %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) + fir.call @_QFPsb(%18, %19#0) fastmath : (!fir.ref>, !fir.ref) -> () + hlfir.copy_out %0, %17#1 : (!fir.ref>>>, i1) -> () + hlfir.end_associate %19#1, %19#2 : !fir.ref, i1 + return +} + +// CHECK-LABEL: func.func private @_test_inline_copy_in( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "i"}, +// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "j"}) { +// CHECK: %[[VAL_3:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant false +// CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_22:.*]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64 +// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_7:.*]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index +// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_7:.*]]:%[[VAL_13:.*]]#1:%[[VAL_7:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_20:.*]] = fir.is_contiguous_box %[[VAL_19:.*]] whole : (!fir.box>) -> i1 +// CHECK: %[[VAL_21:.*]]:2 = fir.if %[[VAL_20:.*]] -> (!fir.box>, i1) { +// CHECK: fir.result %[[VAL_19:.*]], %[[VAL_4:.*]] : !fir.box>, i1 +// CHECK: } else { +// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array, %[[VAL_15:.*]] {bindc_name = ".tmp", uniq_name = ""} +// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered { +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19:.*]] (%arg3) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26:.*]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_25:.*]]#0 (%arg3) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_27:.*]] to %[[VAL_28:.*]] : f64, !fir.ref +// CHECK: } +// CHECK: fir.result %[[VAL_25:.*]]#0, %[[VAL_3:.*]] : !fir.box>, i1 +// CHECK: } +// CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) +// CHECK: fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath : (!fir.ref>, !fir.ref) -> () +// CHECK: fir.if %[[VAL_21:.*]]#1 { +// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.ref>) -> !fir.heap> +// CHECK: fir.freemem %[[VAL_25:.*]] : !fir.heap> +// CHECK: } +// CHECK: hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref, i1 +// CHECK: return +// CHECK: } + +// Test not inlining of hlfir.copy_in that requires the array to be copied out +func.func private @_test_no_inline_copy_in(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "i"}, %arg2: !fir.ref {fir.bindc_name = "j"}) { + %0 = fir.alloca !fir.box>> + %1 = fir.dummy_scope : !fir.dscope + %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %5 = fir.load %2#0 : !fir.ref + %6 = fir.convert %5 : (i32) -> i64 + %c1 = arith.constant 1 : index + %c1_0 = arith.constant 1 : index + %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box>, index) -> (index, index, index) + %c1_1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %8 = arith.subi %7#1, %c1 : index + %9 = arith.addi %8, %c1_1 : index + %10 = arith.divsi %9, %c1_1 : index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.load %3#0 : !fir.ref + %14 = fir.convert %13 : (i32) -> i64 + %15 = fir.shape %12 : (index) -> !fir.shape<1> + %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> + %c100_i32 = arith.constant 100 : i32 + %17:2 = hlfir.copy_in %16 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) + %18 = fir.box_addr %17#0 : (!fir.box>) -> !fir.ref> + %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) + fir.call @_QFPsb(%18, %19#1) fastmath : (!fir.ref>, !fir.ref) -> () + hlfir.copy_out %0, %17#1 to %16 : (!fir.ref>>>, i1, !fir.box>) -> () + hlfir.end_associate %19#1, %19#2 : !fir.ref, i1 + return +} + +// CHECK-LABEL: func.func private @_test_no_inline_copy_in( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "i"}, +// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "j"}) { +// CHECK: %[[VAL_3:.*]] = arith.constant 100 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8:.*]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64 +// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_5:.*]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index +// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_5:.*]]:%[[VAL_13:.*]]#1:%[[VAL_5:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_20:.*]]:2 = hlfir.copy_in %[[VAL_19:.*]] to %[[VAL_6:.*]] : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) +// CHECK: %[[VAL_21:.*]] = fir.box_addr %[[VAL_20:.*]]#0 : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_3:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) +// CHECK: fir.call @_QFPsb(%[[VAL_21:.*]], %[[VAL_22:.*]]#1) fastmath : (!fir.ref>, !fir.ref) -> () +// CHECK: hlfir.copy_out %[[VAL_6:.*]], %[[VAL_20:.*]]#1 to %[[VAL_19:.*]] : (!fir.ref>>>, i1, !fir.box>) -> () +// CHECK: hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref, i1 +// CHECK: return +// CHECK: } From 6d334d77917a9e02b3e397dd1b3ea8605320c795 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Thu, 8 May 2025 15:15:56 +0000 Subject: [PATCH 3/7] Address Tom's review comments Signed-off-by: Kajetan Puchalski --- .../HLFIR/Transforms/InlineHLFIRAssign.cpp | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp index 38c684eaceb7d..dc545ece8adff 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp @@ -158,16 +158,16 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, "CopyInOp's WasCopied has no uses"); // The copy out should always be present, either to actually copy or just // deallocate memory. - auto *copyOut = - copyIn.getWasCopied().getUsers().begin().getCurrent().getUser(); + auto copyOut = mlir::dyn_cast( + copyIn.getWasCopied().getUsers().begin().getCurrent().getUser()); - if (!mlir::isa(copyOut)) + if (!copyOut) return rewriter.notifyMatchFailure(copyIn, "CopyInOp has no direct CopyOut"); // Only inline the copy_in when copy_out does not need to be done, i.e. in // case of intent(in). - if (::llvm::cast(copyOut).getVar()) + if (copyOut.getVar()) return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out"); inputVariable = @@ -175,7 +175,7 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, mlir::Type resultAddrType = copyIn.getCopiedIn().getType(); mlir::Value isContiguous = builder.create(loc, inputVariable); - auto results = + mlir::Operation::result_range results = builder .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous, /*withElseRegion=*/true) @@ -195,11 +195,11 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, loc, builder, extents, /*isUnordered=*/true, flangomp::shouldUseWorkshareLowering(copyIn)); builder.setInsertionPointToStart(loopNest.body); - auto elem = hlfir::getElementAt(loc, builder, inputVariable, - loopNest.oneBasedIndices); + hlfir::Entity elem = hlfir::getElementAt( + loc, builder, inputVariable, loopNest.oneBasedIndices); elem = hlfir::loadTrivialScalar(loc, builder, elem); - auto tempElem = hlfir::getElementAt(loc, builder, temp, - loopNest.oneBasedIndices); + hlfir::Entity tempElem = hlfir::getElementAt( + loc, builder, temp, loopNest.oneBasedIndices); builder.create(loc, elem, tempElem); builder.setInsertionPointAfter(loopNest.outerOp); @@ -209,9 +209,9 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, if (mlir::isa(temp.getType())) { result = temp; } else { - auto refTy = + fir::ReferenceType refTy = fir::ReferenceType::get(temp.getElementOrSequenceType()); - auto refVal = builder.createConvert(loc, refTy, temp); + mlir::Value refVal = builder.createConvert(loc, refTy, temp); result = builder.create(loc, resultAddrType, refVal); } @@ -221,25 +221,30 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, }) .getResults(); - auto addr = results[0]; - auto needsCleanup = results[1]; + mlir::OpResult addr = results[0]; + mlir::OpResult needsCleanup = results[1]; builder.setInsertionPoint(copyOut); - builder.genIfOp(loc, {}, needsCleanup, false).genThen([&] { + builder.genIfOp(loc, {}, needsCleanup, /*withElseRegion=*/false).genThen([&] { auto boxAddr = builder.create(loc, addr); - auto heapType = fir::HeapType::get(fir::BoxValue(addr).getBaseTy()); - auto heapVal = builder.createConvert(loc, heapType, boxAddr.getResult()); + fir::HeapType heapType = + fir::HeapType::get(fir::BoxValue(addr).getBaseTy()); + mlir::Value heapVal = + builder.createConvert(loc, heapType, boxAddr.getResult()); builder.create(loc, heapVal); }); rewriter.eraseOp(copyOut); - auto tempBox = copyIn.getTempBox(); + mlir::Value tempBox = copyIn.getTempBox(); rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)}); // The TempBox is only needed for flang-rt calls which we're no longer - // generating. + // generating. It should have no uses left at this stage. + if (!tempBox.getUses().empty()) + return mlir::failure(); rewriter.eraseOp(tempBox.getDefiningOp()); + return mlir::success(); } From 6a9d0fd6cc3c72ed7382bd78128a4cd59b75abe9 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Thu, 22 May 2025 13:37:53 +0000 Subject: [PATCH 4/7] Separate copy_in inlining into its own pass, add flag Signed-off-by: Kajetan Puchalski --- flang/include/flang/Optimizer/HLFIR/Passes.td | 4 + .../Optimizer/HLFIR/Transforms/CMakeLists.txt | 1 + .../HLFIR/Transforms/InlineHLFIRAssign.cpp | 122 ------------ .../HLFIR/Transforms/InlineHLFIRCopyIn.cpp | 180 ++++++++++++++++++ flang/lib/Optimizer/Passes/Pipelines.cpp | 5 + flang/test/HLFIR/inline-hlfir-assign.fir | 144 -------------- flang/test/HLFIR/inline-hlfir-copy-in.fir | 146 ++++++++++++++ 7 files changed, 336 insertions(+), 266 deletions(-) create mode 100644 flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp create mode 100644 flang/test/HLFIR/inline-hlfir-copy-in.fir diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td index d445140118073..04d7aec5fe489 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.td +++ b/flang/include/flang/Optimizer/HLFIR/Passes.td @@ -69,6 +69,10 @@ def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> { let summary = "Inline hlfir.assign operations"; } +def InlineHLFIRCopyIn : Pass<"inline-hlfir-copy-in"> { + let summary = "Inline hlfir.copy_in operations"; +} + def PropagateFortranVariableAttributes : Pass<"propagate-fortran-attrs"> { let summary = "Propagate FortranVariableFlagsAttr attributes through HLFIR"; } diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt index d959428ebd203..cc74273d9c5d9 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_flang_library(HLFIRTransforms ConvertToFIR.cpp InlineElementals.cpp InlineHLFIRAssign.cpp + InlineHLFIRCopyIn.cpp LowerHLFIRIntrinsics.cpp LowerHLFIROrderedAssignments.cpp ScheduleOrderedAssignments.cpp diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp index dc545ece8adff..6e209cce07ad4 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp @@ -13,7 +13,6 @@ #include "flang/Optimizer/Analysis/AliasAnalysis.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/HLFIRTools.h" -#include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" #include "flang/Optimizer/OpenMP/Passes.h" @@ -128,126 +127,6 @@ class InlineHLFIRAssignConversion } }; -class InlineCopyInConversion : public mlir::OpRewritePattern { -public: - using mlir::OpRewritePattern::OpRewritePattern; - - llvm::LogicalResult - matchAndRewrite(hlfir::CopyInOp copyIn, - mlir::PatternRewriter &rewriter) const override; -}; - -llvm::LogicalResult -InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, - mlir::PatternRewriter &rewriter) const { - fir::FirOpBuilder builder(rewriter, copyIn.getOperation()); - mlir::Location loc = copyIn.getLoc(); - hlfir::Entity inputVariable{copyIn.getVar()}; - if (!fir::isa_trivial(inputVariable.getFortranElementType())) - return rewriter.notifyMatchFailure(copyIn, - "CopyInOp's data type is not trivial"); - - if (fir::isPointerType(inputVariable.getType())) - return rewriter.notifyMatchFailure( - copyIn, "CopyInOp's input variable is a pointer"); - - // There should be exactly one user of WasCopied - the corresponding - // CopyOutOp. - if (copyIn.getWasCopied().getUses().empty()) - return rewriter.notifyMatchFailure(copyIn, - "CopyInOp's WasCopied has no uses"); - // The copy out should always be present, either to actually copy or just - // deallocate memory. - auto copyOut = mlir::dyn_cast( - copyIn.getWasCopied().getUsers().begin().getCurrent().getUser()); - - if (!copyOut) - return rewriter.notifyMatchFailure(copyIn, - "CopyInOp has no direct CopyOut"); - - // Only inline the copy_in when copy_out does not need to be done, i.e. in - // case of intent(in). - if (copyOut.getVar()) - return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out"); - - inputVariable = - hlfir::derefPointersAndAllocatables(loc, builder, inputVariable); - mlir::Type resultAddrType = copyIn.getCopiedIn().getType(); - mlir::Value isContiguous = - builder.create(loc, inputVariable); - mlir::Operation::result_range results = - builder - .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous, - /*withElseRegion=*/true) - .genThen([&]() { - mlir::Value falseVal = builder.create( - loc, builder.getI1Type(), builder.getBoolAttr(false)); - builder.create( - loc, mlir::ValueRange{inputVariable, falseVal}); - }) - .genElse([&] { - auto [temp, cleanup] = - hlfir::createTempFromMold(loc, builder, inputVariable); - mlir::Value shape = hlfir::genShape(loc, builder, inputVariable); - llvm::SmallVector extents = - hlfir::getIndexExtents(loc, builder, shape); - hlfir::LoopNest loopNest = hlfir::genLoopNest( - loc, builder, extents, /*isUnordered=*/true, - flangomp::shouldUseWorkshareLowering(copyIn)); - builder.setInsertionPointToStart(loopNest.body); - hlfir::Entity elem = hlfir::getElementAt( - loc, builder, inputVariable, loopNest.oneBasedIndices); - elem = hlfir::loadTrivialScalar(loc, builder, elem); - hlfir::Entity tempElem = hlfir::getElementAt( - loc, builder, temp, loopNest.oneBasedIndices); - builder.create(loc, elem, tempElem); - builder.setInsertionPointAfter(loopNest.outerOp); - - mlir::Value result; - // Make sure the result is always a boxed array by boxing it - // ourselves if need be. - if (mlir::isa(temp.getType())) { - result = temp; - } else { - fir::ReferenceType refTy = - fir::ReferenceType::get(temp.getElementOrSequenceType()); - mlir::Value refVal = builder.createConvert(loc, refTy, temp); - result = - builder.create(loc, resultAddrType, refVal); - } - - builder.create(loc, - mlir::ValueRange{result, cleanup}); - }) - .getResults(); - - mlir::OpResult addr = results[0]; - mlir::OpResult needsCleanup = results[1]; - - builder.setInsertionPoint(copyOut); - builder.genIfOp(loc, {}, needsCleanup, /*withElseRegion=*/false).genThen([&] { - auto boxAddr = builder.create(loc, addr); - fir::HeapType heapType = - fir::HeapType::get(fir::BoxValue(addr).getBaseTy()); - mlir::Value heapVal = - builder.createConvert(loc, heapType, boxAddr.getResult()); - builder.create(loc, heapVal); - }); - rewriter.eraseOp(copyOut); - - mlir::Value tempBox = copyIn.getTempBox(); - - rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)}); - - // The TempBox is only needed for flang-rt calls which we're no longer - // generating. It should have no uses left at this stage. - if (!tempBox.getUses().empty()) - return mlir::failure(); - rewriter.eraseOp(tempBox.getDefiningOp()); - - return mlir::success(); -} - class InlineHLFIRAssignPass : public hlfir::impl::InlineHLFIRAssignBase { public: @@ -261,7 +140,6 @@ class InlineHLFIRAssignPass mlir::RewritePatternSet patterns(context); patterns.insert(context); - patterns.insert(context); if (mlir::failed(mlir::applyPatternsGreedily( getOperation(), std::move(patterns), config))) { diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp new file mode 100644 index 0000000000000..1e2aecaf535a0 --- /dev/null +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp @@ -0,0 +1,180 @@ +//===- InlineHLFIRCopyIn.cpp - Inline hlfir.copy_in ops -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Transform hlfir.copy_in array operations into loop nests performing element +// per element assignments. For simplicity, the inlining is done for trivial +// data types when the copy_in does not require a corresponding copy_out and +// when the input array is not behind a pointer. This may change in the future. +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +namespace hlfir { +#define GEN_PASS_DEF_INLINEHLFIRCOPYIN +#include "flang/Optimizer/HLFIR/Passes.h.inc" +} // namespace hlfir + +#define DEBUG_TYPE "inline-hlfir-copy-in" + +static llvm::cl::opt noInlineHLFIRCopyIn( + "no-inline-hlfir-copy-in", + llvm::cl::desc("Do not inline hlfir.copy_in operations"), + llvm::cl::init(false)); + +namespace { +class InlineCopyInConversion : public mlir::OpRewritePattern { +public: + using mlir::OpRewritePattern::OpRewritePattern; + + llvm::LogicalResult + matchAndRewrite(hlfir::CopyInOp copyIn, + mlir::PatternRewriter &rewriter) const override; +}; + +llvm::LogicalResult +InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, + mlir::PatternRewriter &rewriter) const { + fir::FirOpBuilder builder(rewriter, copyIn.getOperation()); + mlir::Location loc = copyIn.getLoc(); + hlfir::Entity inputVariable{copyIn.getVar()}; + if (!fir::isa_trivial(inputVariable.getFortranElementType())) + return rewriter.notifyMatchFailure(copyIn, + "CopyInOp's data type is not trivial"); + + if (fir::isPointerType(inputVariable.getType())) + return rewriter.notifyMatchFailure( + copyIn, "CopyInOp's input variable is a pointer"); + + // There should be exactly one user of WasCopied - the corresponding + // CopyOutOp. + if (copyIn.getWasCopied().getUses().empty()) + return rewriter.notifyMatchFailure(copyIn, + "CopyInOp's WasCopied has no uses"); + // The copy out should always be present, either to actually copy or just + // deallocate memory. + auto copyOut = mlir::dyn_cast( + copyIn.getWasCopied().getUsers().begin().getCurrent().getUser()); + + if (!copyOut) + return rewriter.notifyMatchFailure(copyIn, + "CopyInOp has no direct CopyOut"); + + // Only inline the copy_in when copy_out does not need to be done, i.e. in + // case of intent(in). + if (copyOut.getVar()) + return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out"); + + inputVariable = + hlfir::derefPointersAndAllocatables(loc, builder, inputVariable); + mlir::Type resultAddrType = copyIn.getCopiedIn().getType(); + mlir::Value isContiguous = + builder.create(loc, inputVariable); + mlir::Operation::result_range results = + builder + .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous, + /*withElseRegion=*/true) + .genThen([&]() { + mlir::Value falseVal = builder.create( + loc, builder.getI1Type(), builder.getBoolAttr(false)); + builder.create( + loc, mlir::ValueRange{inputVariable, falseVal}); + }) + .genElse([&] { + auto [temp, cleanup] = + hlfir::createTempFromMold(loc, builder, inputVariable); + mlir::Value shape = hlfir::genShape(loc, builder, inputVariable); + llvm::SmallVector extents = + hlfir::getIndexExtents(loc, builder, shape); + hlfir::LoopNest loopNest = hlfir::genLoopNest( + loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(copyIn)); + builder.setInsertionPointToStart(loopNest.body); + hlfir::Entity elem = hlfir::getElementAt( + loc, builder, inputVariable, loopNest.oneBasedIndices); + elem = hlfir::loadTrivialScalar(loc, builder, elem); + hlfir::Entity tempElem = hlfir::getElementAt( + loc, builder, temp, loopNest.oneBasedIndices); + builder.create(loc, elem, tempElem); + builder.setInsertionPointAfter(loopNest.outerOp); + + mlir::Value result; + // Make sure the result is always a boxed array by boxing it + // ourselves if need be. + if (mlir::isa(temp.getType())) { + result = temp; + } else { + fir::ReferenceType refTy = + fir::ReferenceType::get(temp.getElementOrSequenceType()); + mlir::Value refVal = builder.createConvert(loc, refTy, temp); + result = + builder.create(loc, resultAddrType, refVal); + } + + builder.create(loc, + mlir::ValueRange{result, cleanup}); + }) + .getResults(); + + mlir::OpResult addr = results[0]; + mlir::OpResult needsCleanup = results[1]; + + builder.setInsertionPoint(copyOut); + builder.genIfOp(loc, {}, needsCleanup, /*withElseRegion=*/false).genThen([&] { + auto boxAddr = builder.create(loc, addr); + fir::HeapType heapType = + fir::HeapType::get(fir::BoxValue(addr).getBaseTy()); + mlir::Value heapVal = + builder.createConvert(loc, heapType, boxAddr.getResult()); + builder.create(loc, heapVal); + }); + rewriter.eraseOp(copyOut); + + mlir::Value tempBox = copyIn.getTempBox(); + + rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)}); + + // The TempBox is only needed for flang-rt calls which we're no longer + // generating. It should have no uses left at this stage. + if (!tempBox.getUses().empty()) + return mlir::failure(); + rewriter.eraseOp(tempBox.getDefiningOp()); + + return mlir::success(); +} + +class InlineHLFIRCopyInPass + : public hlfir::impl::InlineHLFIRCopyInBase { +public: + void runOnOperation() override { + mlir::MLIRContext *context = &getContext(); + + mlir::GreedyRewriteConfig config; + // Prevent the pattern driver from merging blocks. + config.setRegionSimplificationLevel( + mlir::GreedySimplifyRegionLevel::Disabled); + + mlir::RewritePatternSet patterns(context); + if (!noInlineHLFIRCopyIn) { + patterns.insert(context); + } + + if (mlir::failed(mlir::applyPatternsGreedily( + getOperation(), std::move(patterns), config))) { + mlir::emitError(getOperation()->getLoc(), + "failure in hlfir.copy_in inlining"); + signalPassFailure(); + } + } +}; +} // namespace diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index 77751908e35be..1779623fddc5a 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -255,6 +255,11 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP, pm, hlfir::createOptimizedBufferization); addNestedPassToAllTopLevelOperations( pm, hlfir::createInlineHLFIRAssign); + + if (optLevel == llvm::OptimizationLevel::O3) { + addNestedPassToAllTopLevelOperations( + pm, hlfir::createInlineHLFIRCopyIn); + } } pm.addPass(hlfir::createLowerHLFIROrderedAssignments()); pm.addPass(hlfir::createLowerHLFIRIntrinsics()); diff --git a/flang/test/HLFIR/inline-hlfir-assign.fir b/flang/test/HLFIR/inline-hlfir-assign.fir index df7681b9c5c16..f834e7971e3d5 100644 --- a/flang/test/HLFIR/inline-hlfir-assign.fir +++ b/flang/test/HLFIR/inline-hlfir-assign.fir @@ -353,147 +353,3 @@ func.func @_QPtest_expr_rhs(%arg0: !fir.ref> // CHECK: return // CHECK: } - -// Test inlining of hlfir.copy_in that does not require the array to be copied out -func.func private @_test_inline_copy_in(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "i"}, %arg2: !fir.ref {fir.bindc_name = "j"}) { - %0 = fir.alloca !fir.box>> - %1 = fir.dummy_scope : !fir.dscope - %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) - %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) - %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) - %5 = fir.load %2#0 : !fir.ref - %6 = fir.convert %5 : (i32) -> i64 - %c1 = arith.constant 1 : index - %c1_0 = arith.constant 1 : index - %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box>, index) -> (index, index, index) - %c1_1 = arith.constant 1 : index - %c0 = arith.constant 0 : index - %8 = arith.subi %7#1, %c1 : index - %9 = arith.addi %8, %c1_1 : index - %10 = arith.divsi %9, %c1_1 : index - %11 = arith.cmpi sgt, %10, %c0 : index - %12 = arith.select %11, %10, %c0 : index - %13 = fir.load %3#0 : !fir.ref - %14 = fir.convert %13 : (i32) -> i64 - %15 = fir.shape %12 : (index) -> !fir.shape<1> - %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> - %c100_i32 = arith.constant 100 : i32 - %17:2 = hlfir.copy_in %16 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) - %18 = fir.box_addr %17#0 : (!fir.box>) -> !fir.ref> - %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) - fir.call @_QFPsb(%18, %19#0) fastmath : (!fir.ref>, !fir.ref) -> () - hlfir.copy_out %0, %17#1 : (!fir.ref>>>, i1) -> () - hlfir.end_associate %19#1, %19#2 : !fir.ref, i1 - return -} - -// CHECK-LABEL: func.func private @_test_inline_copy_in( -// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}, -// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "i"}, -// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "j"}) { -// CHECK: %[[VAL_3:.*]] = arith.constant true -// CHECK: %[[VAL_4:.*]] = arith.constant false -// CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 -// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope -// CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) -// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_22:.*]]#0 : !fir.ref -// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64 -// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_7:.*]] : (!fir.box>, index) -> (index, index, index) -// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index -// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index -// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref -// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64 -// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_7:.*]]:%[[VAL_13:.*]]#1:%[[VAL_7:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> -// CHECK: %[[VAL_20:.*]] = fir.is_contiguous_box %[[VAL_19:.*]] whole : (!fir.box>) -> i1 -// CHECK: %[[VAL_21:.*]]:2 = fir.if %[[VAL_20:.*]] -> (!fir.box>, i1) { -// CHECK: fir.result %[[VAL_19:.*]], %[[VAL_4:.*]] : !fir.box>, i1 -// CHECK: } else { -// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array, %[[VAL_15:.*]] {bindc_name = ".tmp", uniq_name = ""} -// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) -// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered { -// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19:.*]] (%arg3) : (!fir.box>, index) -> !fir.ref -// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26:.*]] : !fir.ref -// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_25:.*]]#0 (%arg3) : (!fir.box>, index) -> !fir.ref -// CHECK: hlfir.assign %[[VAL_27:.*]] to %[[VAL_28:.*]] : f64, !fir.ref -// CHECK: } -// CHECK: fir.result %[[VAL_25:.*]]#0, %[[VAL_3:.*]] : !fir.box>, i1 -// CHECK: } -// CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> -// CHECK: %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) -// CHECK: fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath : (!fir.ref>, !fir.ref) -> () -// CHECK: fir.if %[[VAL_21:.*]]#1 { -// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> -// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.ref>) -> !fir.heap> -// CHECK: fir.freemem %[[VAL_25:.*]] : !fir.heap> -// CHECK: } -// CHECK: hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref, i1 -// CHECK: return -// CHECK: } - -// Test not inlining of hlfir.copy_in that requires the array to be copied out -func.func private @_test_no_inline_copy_in(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "i"}, %arg2: !fir.ref {fir.bindc_name = "j"}) { - %0 = fir.alloca !fir.box>> - %1 = fir.dummy_scope : !fir.dscope - %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) - %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) - %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) - %5 = fir.load %2#0 : !fir.ref - %6 = fir.convert %5 : (i32) -> i64 - %c1 = arith.constant 1 : index - %c1_0 = arith.constant 1 : index - %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box>, index) -> (index, index, index) - %c1_1 = arith.constant 1 : index - %c0 = arith.constant 0 : index - %8 = arith.subi %7#1, %c1 : index - %9 = arith.addi %8, %c1_1 : index - %10 = arith.divsi %9, %c1_1 : index - %11 = arith.cmpi sgt, %10, %c0 : index - %12 = arith.select %11, %10, %c0 : index - %13 = fir.load %3#0 : !fir.ref - %14 = fir.convert %13 : (i32) -> i64 - %15 = fir.shape %12 : (index) -> !fir.shape<1> - %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> - %c100_i32 = arith.constant 100 : i32 - %17:2 = hlfir.copy_in %16 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) - %18 = fir.box_addr %17#0 : (!fir.box>) -> !fir.ref> - %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) - fir.call @_QFPsb(%18, %19#1) fastmath : (!fir.ref>, !fir.ref) -> () - hlfir.copy_out %0, %17#1 to %16 : (!fir.ref>>>, i1, !fir.box>) -> () - hlfir.end_associate %19#1, %19#2 : !fir.ref, i1 - return -} - -// CHECK-LABEL: func.func private @_test_no_inline_copy_in( -// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}, -// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "i"}, -// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "j"}) { -// CHECK: %[[VAL_3:.*]] = arith.constant 100 : i32 -// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> -// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope -// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) -// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8:.*]]#0 : !fir.ref -// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64 -// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_5:.*]] : (!fir.box>, index) -> (index, index, index) -// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index -// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index -// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref -// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64 -// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_5:.*]]:%[[VAL_13:.*]]#1:%[[VAL_5:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> -// CHECK: %[[VAL_20:.*]]:2 = hlfir.copy_in %[[VAL_19:.*]] to %[[VAL_6:.*]] : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) -// CHECK: %[[VAL_21:.*]] = fir.box_addr %[[VAL_20:.*]]#0 : (!fir.box>) -> !fir.ref> -// CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_3:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) -// CHECK: fir.call @_QFPsb(%[[VAL_21:.*]], %[[VAL_22:.*]]#1) fastmath : (!fir.ref>, !fir.ref) -> () -// CHECK: hlfir.copy_out %[[VAL_6:.*]], %[[VAL_20:.*]]#1 to %[[VAL_19:.*]] : (!fir.ref>>>, i1, !fir.box>) -> () -// CHECK: hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref, i1 -// CHECK: return -// CHECK: } diff --git a/flang/test/HLFIR/inline-hlfir-copy-in.fir b/flang/test/HLFIR/inline-hlfir-copy-in.fir new file mode 100644 index 0000000000000..7140e93f19979 --- /dev/null +++ b/flang/test/HLFIR/inline-hlfir-copy-in.fir @@ -0,0 +1,146 @@ +// Test inlining of hlfir.copy_in +// RUN: fir-opt --inline-hlfir-copy-in %s | FileCheck %s + +// Test inlining of hlfir.copy_in that does not require the array to be copied out +func.func private @_test_inline_copy_in(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "i"}, %arg2: !fir.ref {fir.bindc_name = "j"}) { + %0 = fir.alloca !fir.box>> + %1 = fir.dummy_scope : !fir.dscope + %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %5 = fir.load %2#0 : !fir.ref + %6 = fir.convert %5 : (i32) -> i64 + %c1 = arith.constant 1 : index + %c1_0 = arith.constant 1 : index + %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box>, index) -> (index, index, index) + %c1_1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %8 = arith.subi %7#1, %c1 : index + %9 = arith.addi %8, %c1_1 : index + %10 = arith.divsi %9, %c1_1 : index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.load %3#0 : !fir.ref + %14 = fir.convert %13 : (i32) -> i64 + %15 = fir.shape %12 : (index) -> !fir.shape<1> + %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> + %c100_i32 = arith.constant 100 : i32 + %17:2 = hlfir.copy_in %16 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) + %18 = fir.box_addr %17#0 : (!fir.box>) -> !fir.ref> + %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) + fir.call @_QFPsb(%18, %19#0) fastmath : (!fir.ref>, !fir.ref) -> () + hlfir.copy_out %0, %17#1 : (!fir.ref>>>, i1) -> () + hlfir.end_associate %19#1, %19#2 : !fir.ref, i1 + return +} + +// CHECK-LABEL: func.func private @_test_inline_copy_in( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "i"}, +// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "j"}) { +// CHECK: %[[VAL_3:.*]] = arith.constant true +// CHECK: %[[VAL_4:.*]] = arith.constant false +// CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_8:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_8:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_22:.*]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64 +// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_7:.*]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index +// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_6:.*]] : index +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_7:.*]]:%[[VAL_13:.*]]#1:%[[VAL_7:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_20:.*]] = fir.is_contiguous_box %[[VAL_19:.*]] whole : (!fir.box>) -> i1 +// CHECK: %[[VAL_21:.*]]:2 = fir.if %[[VAL_20:.*]] -> (!fir.box>, i1) { +// CHECK: fir.result %[[VAL_19:.*]], %[[VAL_4:.*]] : !fir.box>, i1 +// CHECK: } else { +// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array, %[[VAL_15:.*]] {bindc_name = ".tmp", uniq_name = ""} +// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered { +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19:.*]] (%arg3) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26:.*]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_25:.*]]#0 (%arg3) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_27:.*]] to %[[VAL_28:.*]] : f64, !fir.ref +// CHECK: } +// CHECK: fir.result %[[VAL_25:.*]]#0, %[[VAL_3:.*]] : !fir.box>, i1 +// CHECK: } +// CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) +// CHECK: fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath : (!fir.ref>, !fir.ref) -> () +// CHECK: fir.if %[[VAL_21:.*]]#1 { +// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.ref>) -> !fir.heap> +// CHECK: fir.freemem %[[VAL_25:.*]] : !fir.heap> +// CHECK: } +// CHECK: hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref, i1 +// CHECK: return +// CHECK: } + +// Test not inlining of hlfir.copy_in that requires the array to be copied out +func.func private @_test_no_inline_copy_in(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "i"}, %arg2: !fir.ref {fir.bindc_name = "j"}) { + %0 = fir.alloca !fir.box>> + %1 = fir.dummy_scope : !fir.dscope + %2:2 = hlfir.declare %arg1 dummy_scope %1 {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %1 {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %5 = fir.load %2#0 : !fir.ref + %6 = fir.convert %5 : (i32) -> i64 + %c1 = arith.constant 1 : index + %c1_0 = arith.constant 1 : index + %7:3 = fir.box_dims %4#1, %c1_0 : (!fir.box>, index) -> (index, index, index) + %c1_1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %8 = arith.subi %7#1, %c1 : index + %9 = arith.addi %8, %c1_1 : index + %10 = arith.divsi %9, %c1_1 : index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.load %3#0 : !fir.ref + %14 = fir.convert %13 : (i32) -> i64 + %15 = fir.shape %12 : (index) -> !fir.shape<1> + %16 = hlfir.designate %4#0 (%6, %c1:%7#1:%c1_1, %14) shape %15 : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> + %c100_i32 = arith.constant 100 : i32 + %17:2 = hlfir.copy_in %16 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) + %18 = fir.box_addr %17#0 : (!fir.box>) -> !fir.ref> + %19:3 = hlfir.associate %c100_i32 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) + fir.call @_QFPsb(%18, %19#1) fastmath : (!fir.ref>, !fir.ref) -> () + hlfir.copy_out %0, %17#1 to %16 : (!fir.ref>>>, i1, !fir.box>) -> () + hlfir.end_associate %19#1, %19#2 : !fir.ref, i1 + return +} + +// CHECK-LABEL: func.func private @_test_no_inline_copy_in( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "i"}, +// CHECK-SAME: %[[VAL_2:.*]]: !fir.ref {fir.bindc_name = "j"}) { +// CHECK: %[[VAL_3:.*]] = arith.constant 100 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_1:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_2:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ej"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0:.*]] dummy_scope %[[VAL_7:.*]] {uniq_name = "_QFFsb2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_8:.*]]#0 : !fir.ref +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11:.*]] : (i32) -> i64 +// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_10:.*]]#1, %[[VAL_5:.*]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index +// CHECK: %[[VAL_15:.*]] = arith.select %[[VAL_14:.*]], %[[VAL_13:.*]]#1, %[[VAL_4:.*]] : index +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9:.*]]#0 : !fir.ref +// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16:.*]] : (i32) -> i64 +// CHECK: %[[VAL_18:.*]] = fir.shape %[[VAL_15:.*]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_10:.*]]#0 (%[[VAL_12:.*]], %[[VAL_5:.*]]:%[[VAL_13:.*]]#1:%[[VAL_5:.*]], %[[VAL_17:.*]]) shape %[[VAL_18:.*]] : (!fir.box>, i64, index, index, index, i64, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_20:.*]]:2 = hlfir.copy_in %[[VAL_19:.*]] to %[[VAL_6:.*]] : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) +// CHECK: %[[VAL_21:.*]] = fir.box_addr %[[VAL_20:.*]]#0 : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_3:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) +// CHECK: fir.call @_QFPsb(%[[VAL_21:.*]], %[[VAL_22:.*]]#1) fastmath : (!fir.ref>, !fir.ref) -> () +// CHECK: hlfir.copy_out %[[VAL_6:.*]], %[[VAL_20:.*]]#1 to %[[VAL_19:.*]] : (!fir.ref>>>, i1, !fir.box>) -> () +// CHECK: hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref, i1 +// CHECK: return +// CHECK: } From 63f66ae55347275c3f42c456a70dfbb688836fe6 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Wed, 28 May 2025 13:44:53 +0000 Subject: [PATCH 5/7] Support arrays behind a pointer, add metadata to disable vectorizing --- .../flang/Optimizer/Builder/HLFIRTools.h | 8 ++- flang/lib/Optimizer/Builder/HLFIRTools.cpp | 13 +++- .../HLFIR/Transforms/InlineHLFIRCopyIn.cpp | 66 ++++++++++--------- flang/test/HLFIR/inline-hlfir-copy-in.fir | 6 +- 4 files changed, 55 insertions(+), 38 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index ed00cec04dc39..2cbad6e268a38 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -374,12 +374,14 @@ struct LoopNest { /// loop constructs currently. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents, bool isUnordered = false, - bool emitWorkshareLoop = false); + bool emitWorkshareLoop = false, + bool couldVectorize = true); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shape, bool isUnordered = false, - bool emitWorkshareLoop = false) { + bool emitWorkshareLoop = false, + bool couldVectorize = true) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered, emitWorkshareLoop); + isUnordered, emitWorkshareLoop, couldVectorize); } /// The type of a callback that generates the body of a reduction diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index f24dc2caeedfc..14aae5d7118a1 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -21,6 +21,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include #include #include @@ -932,7 +933,8 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents, bool isUnordered, - bool emitWorkshareLoop) { + bool emitWorkshareLoop, + bool couldVectorize) { emitWorkshareLoop = emitWorkshareLoop && isUnordered; hlfir::LoopNest loopNest; assert(!extents.empty() && "must have at least one extent"); @@ -967,6 +969,15 @@ hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, auto ub = builder.createConvert(loc, indexType, extent); auto doLoop = builder.create(loc, one, ub, one, isUnordered); + if (!couldVectorize) { + mlir::LLVM::LoopVectorizeAttr va{mlir::LLVM::LoopVectorizeAttr::get( + builder.getContext(), + /*disable=*/builder.getBoolAttr(true), {}, {}, {}, {}, {}, {})}; + mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get( + builder.getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ {}, + /*unroll_and_jam*/ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}); + doLoop.setLoopAnnotationAttr(la); + } loopNest.body = doLoop.getBody(); builder.setInsertionPointToStart(loopNest.body); // Reverse the indices so they are in column-major order. diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp index 1e2aecaf535a0..d1cbe3241c07b 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp @@ -52,19 +52,15 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, return rewriter.notifyMatchFailure(copyIn, "CopyInOp's data type is not trivial"); - if (fir::isPointerType(inputVariable.getType())) - return rewriter.notifyMatchFailure( - copyIn, "CopyInOp's input variable is a pointer"); - // There should be exactly one user of WasCopied - the corresponding // CopyOutOp. - if (copyIn.getWasCopied().getUses().empty()) - return rewriter.notifyMatchFailure(copyIn, - "CopyInOp's WasCopied has no uses"); + if (!copyIn.getWasCopied().hasOneUse()) + return rewriter.notifyMatchFailure( + copyIn, "CopyInOp's WasCopied has no single user"); // The copy out should always be present, either to actually copy or just // deallocate memory. auto copyOut = mlir::dyn_cast( - copyIn.getWasCopied().getUsers().begin().getCurrent().getUser()); + copyIn.getWasCopied().user_begin().getCurrent().getUser()); if (!copyOut) return rewriter.notifyMatchFailure(copyIn, @@ -77,28 +73,45 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, inputVariable = hlfir::derefPointersAndAllocatables(loc, builder, inputVariable); - mlir::Type resultAddrType = copyIn.getCopiedIn().getType(); + mlir::Type sequenceType = + hlfir::getFortranElementOrSequenceType(inputVariable.getType()); + fir::BoxType resultBoxType = fir::BoxType::get(sequenceType); mlir::Value isContiguous = builder.create(loc, inputVariable); mlir::Operation::result_range results = builder - .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isContiguous, + .genIfOp(loc, {resultBoxType, builder.getI1Type()}, isContiguous, /*withElseRegion=*/true) .genThen([&]() { - mlir::Value falseVal = builder.create( - loc, builder.getI1Type(), builder.getBoolAttr(false)); + mlir::Value result = inputVariable; + if (fir::isPointerType(inputVariable.getType())) { + auto boxAddr = builder.create(loc, inputVariable); + fir::ReferenceType refTy = fir::ReferenceType::get(sequenceType); + mlir::Value refVal = builder.createConvert(loc, refTy, boxAddr); + mlir::Value shape = hlfir::genShape(loc, builder, inputVariable); + result = builder.create(loc, resultBoxType, refVal, + shape); + } builder.create( - loc, mlir::ValueRange{inputVariable, falseVal}); + loc, mlir::ValueRange{result, builder.createBool(loc, false)}); }) .genElse([&] { - auto [temp, cleanup] = - hlfir::createTempFromMold(loc, builder, inputVariable); mlir::Value shape = hlfir::genShape(loc, builder, inputVariable); llvm::SmallVector extents = hlfir::getIndexExtents(loc, builder, shape); - hlfir::LoopNest loopNest = hlfir::genLoopNest( - loc, builder, extents, /*isUnordered=*/true, - flangomp::shouldUseWorkshareLowering(copyIn)); + llvm::StringRef tmpName{".tmp.copy_in"}; + llvm::SmallVector lenParams; + mlir::Value alloc = builder.createHeapTemporary( + loc, sequenceType, tmpName, extents, lenParams); + + auto declareOp = builder.create( + loc, alloc, tmpName, shape, lenParams, + /*dummy_scope=*/nullptr); + hlfir::Entity temp{declareOp.getBase()}; + hlfir::LoopNest loopNest = + hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true, + flangomp::shouldUseWorkshareLowering(copyIn), + /*couldVectorize=*/false); builder.setInsertionPointToStart(loopNest.body); hlfir::Entity elem = hlfir::getElementAt( loc, builder, inputVariable, loopNest.oneBasedIndices); @@ -117,12 +130,12 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, fir::ReferenceType refTy = fir::ReferenceType::get(temp.getElementOrSequenceType()); mlir::Value refVal = builder.createConvert(loc, refTy, temp); - result = - builder.create(loc, resultAddrType, refVal); + result = builder.create(loc, resultBoxType, refVal, + shape); } - builder.create(loc, - mlir::ValueRange{result, cleanup}); + builder.create( + loc, mlir::ValueRange{result, builder.createBool(loc, true)}); }) .getResults(); @@ -140,16 +153,7 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, }); rewriter.eraseOp(copyOut); - mlir::Value tempBox = copyIn.getTempBox(); - rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)}); - - // The TempBox is only needed for flang-rt calls which we're no longer - // generating. It should have no uses left at this stage. - if (!tempBox.getUses().empty()) - return mlir::failure(); - rewriter.eraseOp(tempBox.getDefiningOp()); - return mlir::success(); } diff --git a/flang/test/HLFIR/inline-hlfir-copy-in.fir b/flang/test/HLFIR/inline-hlfir-copy-in.fir index 7140e93f19979..7a5b6e591f7c7 100644 --- a/flang/test/HLFIR/inline-hlfir-copy-in.fir +++ b/flang/test/HLFIR/inline-hlfir-copy-in.fir @@ -60,9 +60,9 @@ func.func private @_test_inline_copy_in(%arg0: !fir.box> { // CHECK: %[[VAL_21:.*]]:2 = fir.if %[[VAL_20:.*]] -> (!fir.box>, i1) { // CHECK: fir.result %[[VAL_19:.*]], %[[VAL_4:.*]] : !fir.box>, i1 // CHECK: } else { -// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array, %[[VAL_15:.*]] {bindc_name = ".tmp", uniq_name = ""} -// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) -// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered { +// CHECK: %[[VAL_24:.*]] = fir.allocmem !fir.array, %[[VAL_15:.*]] {bindc_name = ".tmp.copy_in", uniq_name = ""} +// CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_24:.*]](%[[VAL_18:.*]]) {uniq_name = ".tmp.copy_in"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +// CHECK: fir.do_loop %arg3 = %[[VAL_7:.*]] to %[[VAL_15:.*]] step %[[VAL_7:.*]] unordered attributes {loopAnnotation = #loop_annotation} { // CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_19:.*]] (%arg3) : (!fir.box>, index) -> !fir.ref // CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_26:.*]] : !fir.ref // CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_25:.*]]#0 (%arg3) : (!fir.box>, index) -> !fir.ref From 837d45b0661c29661969407e6ede3f7a70e4739e Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Thu, 29 May 2025 14:10:36 +0000 Subject: [PATCH 6/7] Keep the copy-out to deallocate the temporary Signed-off-by: Kajetan Puchalski --- .../HLFIR/Transforms/InlineHLFIRCopyIn.cpp | 20 +++++++------------ flang/test/HLFIR/inline-hlfir-copy-in.fir | 6 +----- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp index d1cbe3241c07b..0cad503afe16d 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp @@ -139,21 +139,15 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, }) .getResults(); - mlir::OpResult addr = results[0]; + mlir::OpResult resultBox = results[0]; mlir::OpResult needsCleanup = results[1]; - builder.setInsertionPoint(copyOut); - builder.genIfOp(loc, {}, needsCleanup, /*withElseRegion=*/false).genThen([&] { - auto boxAddr = builder.create(loc, addr); - fir::HeapType heapType = - fir::HeapType::get(fir::BoxValue(addr).getBaseTy()); - mlir::Value heapVal = - builder.createConvert(loc, heapType, boxAddr.getResult()); - builder.create(loc, heapVal); - }); - rewriter.eraseOp(copyOut); - - rewriter.replaceOp(copyIn, {addr, builder.genNot(loc, isContiguous)}); + auto alloca = builder.create(loc, resultBox.getType()); + auto store = builder.create(loc, resultBox, alloca); + copyOut->setOperand(0, store.getMemref()); + copyOut->setOperand(1, needsCleanup); + + rewriter.replaceOp(copyIn, {resultBox, builder.genNot(loc, isContiguous)}); return mlir::success(); } diff --git a/flang/test/HLFIR/inline-hlfir-copy-in.fir b/flang/test/HLFIR/inline-hlfir-copy-in.fir index 7a5b6e591f7c7..c1d5e11939b7c 100644 --- a/flang/test/HLFIR/inline-hlfir-copy-in.fir +++ b/flang/test/HLFIR/inline-hlfir-copy-in.fir @@ -73,11 +73,7 @@ func.func private @_test_inline_copy_in(%arg0: !fir.box> { // CHECK: %[[VAL_22:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> // CHECK: %[[VAL_23:.*]]:3 = hlfir.associate %[[VAL_5:.*]] {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) // CHECK: fir.call @_QFPsb(%[[VAL_22:.*]], %[[VAL_23:.*]]#0) fastmath : (!fir.ref>, !fir.ref) -> () -// CHECK: fir.if %[[VAL_21:.*]]#1 { -// CHECK: %[[VAL_24:.*]] = fir.box_addr %[[VAL_21:.*]]#0 : (!fir.box>) -> !fir.ref> -// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24:.*]] : (!fir.ref>) -> !fir.heap> -// CHECK: fir.freemem %[[VAL_25:.*]] : !fir.heap> -// CHECK: } +// CHECK: hlfir.copy_out %16, %15#1 : (!fir.ref>>, i1) -> () // CHECK: hlfir.end_associate %[[VAL_23:.*]]#1, %[[VAL_23:.*]]#2 : !fir.ref, i1 // CHECK: return // CHECK: } From 4c0e8e80d2a23b6afc8862966634b063b6bdfed4 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski Date: Fri, 30 May 2025 11:59:31 +0000 Subject: [PATCH 7/7] Use rebox, assumed-rank handling, expand tests Signed-off-by: Kajetan Puchalski --- .../HLFIR/Transforms/InlineHLFIRCopyIn.cpp | 17 +++-- flang/test/HLFIR/inline-hlfir-copy-in.fir | 64 +++++++++++++++++++ 2 files changed, 75 insertions(+), 6 deletions(-) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp index 0cad503afe16d..7e8acc515ee26 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp @@ -48,6 +48,7 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, fir::FirOpBuilder builder(rewriter, copyIn.getOperation()); mlir::Location loc = copyIn.getLoc(); hlfir::Entity inputVariable{copyIn.getVar()}; + mlir::Type resultAddrType = copyIn.getCopiedIn().getType(); if (!fir::isa_trivial(inputVariable.getFortranElementType())) return rewriter.notifyMatchFailure(copyIn, "CopyInOp's data type is not trivial"); @@ -66,6 +67,10 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, return rewriter.notifyMatchFailure(copyIn, "CopyInOp has no direct CopyOut"); + if (mlir::cast(resultAddrType).isAssumedRank()) + return rewriter.notifyMatchFailure(copyIn, + "The result array is assumed-rank"); + // Only inline the copy_in when copy_out does not need to be done, i.e. in // case of intent(in). if (copyOut.getVar()) @@ -85,12 +90,9 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, .genThen([&]() { mlir::Value result = inputVariable; if (fir::isPointerType(inputVariable.getType())) { - auto boxAddr = builder.create(loc, inputVariable); - fir::ReferenceType refTy = fir::ReferenceType::get(sequenceType); - mlir::Value refVal = builder.createConvert(loc, refTy, boxAddr); - mlir::Value shape = hlfir::genShape(loc, builder, inputVariable); - result = builder.create(loc, resultBoxType, refVal, - shape); + result = builder.create( + loc, resultBoxType, inputVariable, mlir::Value{}, + mlir::Value{}); } builder.create( loc, mlir::ValueRange{result, builder.createBool(loc, false)}); @@ -142,10 +144,13 @@ InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn, mlir::OpResult resultBox = results[0]; mlir::OpResult needsCleanup = results[1]; + // Prepare the corresponding copyOut to free the temporary if it is required auto alloca = builder.create(loc, resultBox.getType()); auto store = builder.create(loc, resultBox, alloca); + rewriter.startOpModification(copyOut); copyOut->setOperand(0, store.getMemref()); copyOut->setOperand(1, needsCleanup); + rewriter.finalizeOpModification(copyOut); rewriter.replaceOp(copyIn, {resultBox, builder.genNot(loc, isContiguous)}); return mlir::success(); diff --git a/flang/test/HLFIR/inline-hlfir-copy-in.fir b/flang/test/HLFIR/inline-hlfir-copy-in.fir index c1d5e11939b7c..f3c4b38962a0c 100644 --- a/flang/test/HLFIR/inline-hlfir-copy-in.fir +++ b/flang/test/HLFIR/inline-hlfir-copy-in.fir @@ -34,6 +34,8 @@ func.func private @_test_inline_copy_in(%arg0: !fir.box> { return } +// CHECK: #loop_vectorize = #llvm.loop_vectorize +// CHECK: #loop_annotation = #llvm.loop_annotation // CHECK-LABEL: func.func private @_test_inline_copy_in( // CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "x"}, // CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "i"}, @@ -140,3 +142,65 @@ func.func private @_test_no_inline_copy_in(%arg0: !fir.box // CHECK: hlfir.end_associate %[[VAL_22:.*]]#1, %[[VAL_22:.*]]#2 : !fir.ref, i1 // CHECK: return // CHECK: } + +// Test not inlining optional dummy arguments (no direct copy-out) +func.func @_QPoptional_copy_in_out(%arg0: !fir.box> {fir.bindc_name = "x", fir.optional}) { + %false = arith.constant false + %0 = fir.alloca !fir.box>> + %1 = fir.dummy_scope : !fir.dscope + %2:2 = hlfir.declare %arg0 dummy_scope %1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFoptional_copy_in_outEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3 = fir.is_present %2#0 : (!fir.box>) -> i1 + %4:2 = fir.if %3 -> (!fir.ref>, i1) { + %5:2 = hlfir.copy_in %2#0 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) + %6 = fir.box_addr %5#0 : (!fir.box>) -> !fir.ref> + fir.result %6, %5#1 : !fir.ref>, i1 + } else { + %5 = fir.absent !fir.ref> + fir.result %5, %false : !fir.ref>, i1 + } + fir.call @_QPtakes_optional_explicit(%4#0) fastmath : (!fir.ref>) -> () + hlfir.copy_out %0, %4#1 : (!fir.ref>>>, i1) -> () + return +} + +// CHECK-LABEL: func.func @_QPoptional_copy_in_out( +// CHECK-SAME: %[[ARG_0:.*]]: !fir.box> {fir.bindc_name = "x", fir.optional}) { +// CHECK: %false = arith.constant false +// CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG_0:.*]] dummy_scope %[[VAL_1:.*]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFoptional_copy_in_outEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_3:.*]] = fir.is_present %[[VAL_2:.*]]#0 : (!fir.box>) -> i1 +// CHECK: %[[VAL_4:.*]]:2 = fir.if %[[VAL_3:.*]] -> (!fir.ref>, i1) { +// CHECK: %[[VAL_5:.*]]:2 = hlfir.copy_in %[[VAL_2:.*]]#0 to %[[VAL_0:.*]] : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) +// CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_5:.*]]#0 : (!fir.box>) -> !fir.ref> +// CHECK: fir.result %[[VAL_6:.*]], %[[VAL_5:.*]]#1 : !fir.ref>, i1 +// CHECK: } else { +// CHECK: %[[VAL_5:.*]] = fir.absent !fir.ref> +// CHECK: fir.result %[[VAL_5:.*]], %false : !fir.ref>, i1 +// CHECK: } +// CHECK: fir.call @_QPtakes_optional_explicit(%[[VAL_4:.*]]#0) fastmath : (!fir.ref>) -> () +// CHECK: hlfir.copy_out %[[VAL_0:.*]], %[[VAL_4:.*]]#1 : (!fir.ref>>>, i1) -> () +// CHECK: return +// CHECK: } + +// Test not inlining of assumed-rank arrays +func.func @_QPtest_copy_in_out_2(%arg0: !fir.box> {fir.bindc_name = "x"}) { + %0 = fir.alloca !fir.box>> + %1 = fir.dummy_scope : !fir.dscope + %2:2 = hlfir.declare %arg0 dummy_scope %1 {uniq_name = "_QFtest_copy_in_out_2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3:2 = hlfir.copy_in %2#0 to %0 : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) + fir.call @_QPtakes_contiguous_intentin(%3#0) fastmath : (!fir.box>) -> () + hlfir.copy_out %0, %3#1 : (!fir.ref>>>, i1) -> () + return +} + +// CHECK-LABEL: func.func @_QPtest_copy_in_out_2( +// CHECK-SAME: %[[ARG_0]]: !fir.box> {fir.bindc_name = "x"}) { +// CHECK: %[[VAL_0]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_1]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_2]]:2 = hlfir.declare %[[ARG_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFtest_copy_in_out_2Ex"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_3]]:2 = hlfir.copy_in %[[VAL_2]]#0 to %[[VAL_0]] : (!fir.box>, !fir.ref>>>) -> (!fir.box>, i1) +// CHECK: fir.call @_QPtakes_contiguous_intentin(%[[VAL_3]]#0) fastmath : (!fir.box>) -> () +// CHECK: hlfir.copy_out %[[VAL_0]], %[[VAL_3]]#1 : (!fir.ref>>>, i1) -> () +// CHECK: return +// CHECK: }