diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index efbd9e4f50d43..c8aad644bc784 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -366,6 +366,10 @@ struct LoopNest { /// Generate a fir.do_loop nest looping from 1 to extents[i]. /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. +/// +/// NOTE: genLoopNestWithReductions() should be used in favor +/// of this method, though, it cannot generate OpenMP workshare +/// loop constructs currently. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents, bool isUnordered = false, bool emitWorkshareLoop = false); @@ -376,6 +380,37 @@ inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, isUnordered, emitWorkshareLoop); } +/// The type of a callback that generates the body of a reduction +/// loop nest. It takes a location and a builder, as usual. +/// In addition, the first set of values are the values of the loops' +/// induction variables. The second set of values are the values +/// of the reductions on entry to the innermost loop. +/// The callback must return the updated values of the reductions. +using ReductionLoopBodyGenerator = std::function( + mlir::Location, fir::FirOpBuilder &, mlir::ValueRange, mlir::ValueRange)>; + +/// Generate a loop nest loopong from 1 to \p extents[i] and reducing +/// a set of values. +/// \p isUnordered specifies whether the loops in the loop nest +/// are unordered. +/// \p reductionInits are the initial values of the reductions +/// on entry to the outermost loop. +/// \p genBody callback is repsonsible for generating the code +/// that updates the reduction values in the innermost loop. +/// +/// NOTE: the implementation of this function may decide +/// to perform the reductions on SSA or in memory. +/// In the latter case, this function is responsible for +/// allocating/loading/storing the reduction variables, +/// and making sure they have proper data sharing attributes +/// in case any parallel constructs are present around the point +/// of the loop nest insertion, or if the function decides +/// to use any worksharing loop constructs for the loop nest. +llvm::SmallVector genLoopNestWithReductions( + mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents, + mlir::ValueRange reductionInits, const ReductionLoopBodyGenerator &genBody, + bool isUnordered = false); + /// Inline the body of an hlfir.elemental at the current insertion point /// given a list of one based indices. This generates the computation /// of one element of the elemental expression. Return the YieldElementOp diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 1bd950f2445ee..94238bc24e453 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -910,6 +910,56 @@ hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, return loopNest; } +llvm::SmallVector hlfir::genLoopNestWithReductions( + mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents, + mlir::ValueRange reductionInits, const ReductionLoopBodyGenerator &genBody, + bool isUnordered) { + assert(!extents.empty() && "must have at least one extent"); + // Build loop nest from column to row. + auto one = builder.create(loc, 1); + mlir::Type indexType = builder.getIndexType(); + unsigned dim = extents.size() - 1; + fir::DoLoopOp outerLoop = nullptr; + fir::DoLoopOp parentLoop = nullptr; + llvm::SmallVector oneBasedIndices; + oneBasedIndices.resize(dim + 1); + for (auto extent : llvm::reverse(extents)) { + auto ub = builder.createConvert(loc, indexType, extent); + + // The outermost loop takes reductionInits as the initial + // values of its iter-args. + // A child loop takes its iter-args from the region iter-args + // of its parent loop. + fir::DoLoopOp doLoop; + if (!parentLoop) { + doLoop = builder.create(loc, one, ub, one, isUnordered, + /*finalCountValue=*/false, + reductionInits); + } else { + doLoop = builder.create(loc, one, ub, one, isUnordered, + /*finalCountValue=*/false, + parentLoop.getRegionIterArgs()); + // Return the results of the child loop from its parent loop. + builder.create(loc, doLoop.getResults()); + } + + builder.setInsertionPointToStart(doLoop.getBody()); + // Reverse the indices so they are in column-major order. + oneBasedIndices[dim--] = doLoop.getInductionVar(); + if (!outerLoop) + outerLoop = doLoop; + parentLoop = doLoop; + } + + llvm::SmallVector reductionValues; + reductionValues = + genBody(loc, builder, oneBasedIndices, parentLoop.getRegionIterArgs()); + builder.setInsertionPointToEnd(parentLoop.getBody()); + builder.create(loc, reductionValues); + builder.setInsertionPointAfter(outerLoop); + return outerLoop->getResults(); +} + static fir::ExtendedValue translateVariableToExtendedValue( mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity variable, bool forceHlfirBase = false, bool contiguousHint = false) { diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp index b61f9767ccc2b..f58fde9eb5f36 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp @@ -105,34 +105,43 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { mlir::PatternRewriter &rewriter) const override { mlir::Location loc = sum.getLoc(); fir::FirOpBuilder builder{rewriter, sum.getOperation()}; - hlfir::ExprType expr = mlir::dyn_cast(sum.getType()); - assert(expr && "expected an expression type for the result of hlfir.sum"); - mlir::Type elementType = expr.getElementType(); + mlir::Type elementType = hlfir::getFortranElementType(sum.getType()); hlfir::Entity array = hlfir::Entity{sum.getArray()}; mlir::Value mask = sum.getMask(); mlir::Value dim = sum.getDim(); - int64_t dimVal = fir::getIntIfConstant(dim).value_or(0); + bool isTotalReduction = hlfir::Entity{sum}.getRank() == 0; + int64_t dimVal = + isTotalReduction ? 0 : fir::getIntIfConstant(dim).value_or(0); mlir::Value resultShape, dimExtent; - std::tie(resultShape, dimExtent) = - genResultShape(loc, builder, array, dimVal); + llvm::SmallVector arrayExtents; + if (isTotalReduction) + arrayExtents = genArrayExtents(loc, builder, array); + else + std::tie(resultShape, dimExtent) = + genResultShapeForPartialReduction(loc, builder, array, dimVal); + + // If the mask is present and is a scalar, then we'd better load its value + // outside of the reduction loop making the loop unswitching easier. + mlir::Value isPresentPred, maskValue; + if (mask) { + if (mlir::isa(mask.getType())) { + // MASK represented by a box might be dynamically optional, + // so we have to check for its presence before accessing it. + isPresentPred = + builder.create(loc, builder.getI1Type(), mask); + } + + if (hlfir::Entity{mask}.isScalar()) + maskValue = genMaskValue(loc, builder, mask, isPresentPred, {}); + } auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange inputIndices) -> hlfir::Entity { // Loop over all indices in the DIM dimension, and reduce all values. - // We do not need to create the reduction loop always: if we can - // slice the input array given the inputIndices, then we can - // just apply a new SUM operation (total reduction) to the slice. - // For the time being, generate the explicit loop because the slicing - // requires generating an elemental operation for the input array - // (and the mask, if present). - // TODO: produce the slices and new SUM after adding a pattern - // for expanding total reduction SUM case. - mlir::Type indexType = builder.getIndexType(); - auto one = builder.createIntegerConstant(loc, indexType, 1); - auto ub = builder.createConvert(loc, indexType, dimExtent); + // If DIM is not present, do total reduction. // Initial value for the reduction. - mlir::Value initValue = genInitValue(loc, builder, elementType); + mlir::Value reductionInitValue = genInitValue(loc, builder, elementType); // The reduction loop may be unordered if FastMathFlags::reassoc // transformations are allowed. The integer reduction is always @@ -141,79 +150,83 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { static_cast(sum.getFastmath() & mlir::arith::FastMathFlags::reassoc); - // If the mask is present and is a scalar, then we'd better load its value - // outside of the reduction loop making the loop unswitching easier. - // Maybe it is worth hoisting it from the elemental operation as well. - mlir::Value isPresentPred, maskValue; - if (mask) { - if (mlir::isa(mask.getType())) { - // MASK represented by a box might be dynamically optional, - // so we have to check for its presence before accessing it. - isPresentPred = - builder.create(loc, builder.getI1Type(), mask); + llvm::SmallVector extents; + if (isTotalReduction) + extents = arrayExtents; + else + extents.push_back( + builder.createConvert(loc, builder.getIndexType(), dimExtent)); + + auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, + mlir::ValueRange oneBasedIndices, + mlir::ValueRange reductionArgs) + -> llvm::SmallVector { + // Generate the reduction loop-nest body. + // The initial reduction value in the innermost loop + // is passed via reductionArgs[0]. + llvm::SmallVector indices; + if (isTotalReduction) { + indices = oneBasedIndices; + } else { + indices = inputIndices; + indices.insert(indices.begin() + dimVal - 1, oneBasedIndices[0]); } - if (hlfir::Entity{mask}.isScalar()) - maskValue = genMaskValue(loc, builder, mask, isPresentPred, {}); - } + mlir::Value reductionValue = reductionArgs[0]; + fir::IfOp ifOp; + if (mask) { + // Make the reduction value update conditional on the value + // of the mask. + if (!maskValue) { + // If the mask is an array, use the elemental and the loop indices + // to address the proper mask element. + maskValue = + genMaskValue(loc, builder, mask, isPresentPred, indices); + } + mlir::Value isUnmasked = builder.create( + loc, builder.getI1Type(), maskValue); + ifOp = builder.create(loc, elementType, isUnmasked, + /*withElseRegion=*/true); + // In the 'else' block return the current reduction value. + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, reductionValue); + + // In the 'then' block do the actual addition. + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + } - // NOTE: the outer elemental operation may be lowered into - // omp.workshare.loop_wrapper/omp.loop_nest later, so the reduction - // loop may appear disjoint from the workshare loop nest. - // Moreover, the inner loop is not strictly nested (due to the reduction - // starting value initialization), and the above omp dialect operations - // cannot produce results. - // It is unclear what we should do about it yet. - auto doLoop = builder.create( - loc, one, ub, one, isUnordered, /*finalCountValue=*/false, - mlir::ValueRange{initValue}); - - // Address the input array using the reduction loop's IV - // for the DIM dimension. - mlir::Value iv = doLoop.getInductionVar(); - llvm::SmallVector indices{inputIndices}; - indices.insert(indices.begin() + dimVal - 1, iv); - - mlir::OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointToStart(doLoop.getBody()); - mlir::Value reductionValue = doLoop.getRegionIterArgs()[0]; - fir::IfOp ifOp; - if (mask) { - // Make the reduction value update conditional on the value - // of the mask. - if (!maskValue) { - // If the mask is an array, use the elemental and the loop indices - // to address the proper mask element. - maskValue = genMaskValue(loc, builder, mask, isPresentPred, indices); + hlfir::Entity element = + hlfir::getElementAt(loc, builder, array, indices); + hlfir::Entity elementValue = + hlfir::loadTrivialScalar(loc, builder, element); + // NOTE: we can use "Kahan summation" same way as the runtime + // (e.g. when fast-math is not allowed), but let's start with + // the simple version. + reductionValue = + genScalarAdd(loc, builder, reductionValue, elementValue); + + if (ifOp) { + builder.create(loc, reductionValue); + builder.setInsertionPointAfter(ifOp); + reductionValue = ifOp.getResult(0); } - mlir::Value isUnmasked = - builder.create(loc, builder.getI1Type(), maskValue); - ifOp = builder.create(loc, elementType, isUnmasked, - /*withElseRegion=*/true); - // In the 'else' block return the current reduction value. - builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, reductionValue); - - // In the 'then' block do the actual addition. - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - } - hlfir::Entity element = hlfir::getElementAt(loc, builder, array, indices); - hlfir::Entity elementValue = - hlfir::loadTrivialScalar(loc, builder, element); - // NOTE: we can use "Kahan summation" same way as the runtime - // (e.g. when fast-math is not allowed), but let's start with - // the simple version. - reductionValue = genScalarAdd(loc, builder, reductionValue, elementValue); - builder.create(loc, reductionValue); - - if (ifOp) { - builder.setInsertionPointAfter(ifOp); - builder.create(loc, ifOp.getResult(0)); - } + return {reductionValue}; + }; - return hlfir::Entity{doLoop.getResult(0)}; + llvm::SmallVector reductionFinalValues = + hlfir::genLoopNestWithReductions(loc, builder, extents, + {reductionInitValue}, genBody, + isUnordered); + return hlfir::Entity{reductionFinalValues[0]}; }; + + if (isTotalReduction) { + hlfir::Entity result = genKernel(loc, builder, mlir::ValueRange{}); + rewriter.replaceOp(sum, result); + return mlir::success(); + } + hlfir::ElementalOp elementalOp = hlfir::genElementalOp( loc, builder, elementType, resultShape, {}, genKernel, /*isUnordered=*/true, /*polymorphicMold=*/nullptr, @@ -229,20 +242,29 @@ class SumAsElementalConversion : public mlir::OpRewritePattern { } private: + static llvm::SmallVector + genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity array) { + mlir::Value inShape = hlfir::genShape(loc, builder, array); + llvm::SmallVector inExtents = + hlfir::getExplicitExtentsFromShape(inShape, builder); + if (inShape.getUses().empty()) + inShape.getDefiningOp()->erase(); + return inExtents; + } + // Return fir.shape specifying the shape of the result // of a SUM reduction with DIM=dimVal. The second return value // is the extent of the DIM dimension. static std::tuple - genResultShape(mlir::Location loc, fir::FirOpBuilder &builder, - hlfir::Entity array, int64_t dimVal) { - mlir::Value inShape = hlfir::genShape(loc, builder, array); + genResultShapeForPartialReduction(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity array, int64_t dimVal) { llvm::SmallVector inExtents = - hlfir::getExplicitExtentsFromShape(inShape, builder); + genArrayExtents(loc, builder, array); assert(dimVal > 0 && dimVal <= static_cast(inExtents.size()) && "DIM must be present and a positive constant not exceeding " "the array's rank"); - if (inShape.getUses().empty()) - inShape.getDefiningOp()->erase(); mlir::Value dimExtent = inExtents[dimVal - 1]; inExtents.erase(inExtents.begin() + dimVal - 1); @@ -355,22 +377,22 @@ class SimplifyHLFIRIntrinsics target.addDynamicallyLegalOp([](hlfir::SumOp sum) { if (!simplifySum) return true; - if (mlir::Value dim = sum.getDim()) { - if (auto dimVal = fir::getIntIfConstant(dim)) { - if (!fir::isa_trivial(sum.getType())) { - // Ignore the case SUM(a, DIM=X), where 'a' is a 1D array. - // It is only legal when X is 1, and it should probably be - // canonicalized into SUM(a). - fir::SequenceType arrayTy = mlir::cast( - hlfir::getFortranElementOrSequenceType( - sum.getArray().getType())); - if (*dimVal > 0 && *dimVal <= arrayTy.getDimension()) { - // Ignore SUMs with illegal DIM values. - // They may appear in dead code, - // and they do not have to be converted. - return false; - } - } + + // Always inline total reductions. + if (hlfir::Entity{sum}.getRank() == 0) + return false; + mlir::Value dim = sum.getDim(); + if (!dim) + return false; + + if (auto dimVal = fir::getIntIfConstant(dim)) { + fir::SequenceType arrayTy = mlir::cast( + hlfir::getFortranElementOrSequenceType(sum.getArray().getType())); + if (*dimVal > 0 && *dimVal <= arrayTy.getDimension()) { + // Ignore SUMs with illegal DIM values. + // They may appear in dead code, + // and they do not have to be converted. + return false; } } return true; diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-sum.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-sum.fir index 54a592a66670f..bb406a6ad359d 100644 --- a/flang/test/HLFIR/simplify-hlfir-intrinsics-sum.fir +++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-sum.fir @@ -14,9 +14,9 @@ func.func @sum_box_known_extents(%arg0: !fir.box>) { // CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_5:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<2xi32> { // CHECK: ^bb0(%[[VAL_6:.*]]: index): -// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 -// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_7]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i32) { +// CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_3]] step %[[VAL_8]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_7]]) -> (i32) { // CHECK: %[[VAL_12:.*]] = arith.constant 0 : index // CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_12]] : (!fir.box>, index) -> (index, index, index) // CHECK: %[[VAL_14:.*]] = arith.constant 1 : index @@ -50,9 +50,9 @@ func.func @sum_expr_known_extents(%arg0: !hlfir.expr<2x3xi32>) { // CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_5:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xi32> { // CHECK: ^bb0(%[[VAL_6:.*]]: index): -// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 -// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_7]] to %[[VAL_2]] step %[[VAL_7]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i32) { +// CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_2]] step %[[VAL_8]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_7]]) -> (i32) { // CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_6]] : (!hlfir.expr<2x3xi32>, index, index) -> i32 // CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32 // CHECK: fir.result %[[VAL_13]] : i32 @@ -77,12 +77,12 @@ func.func @sum_box_unknown_extent1(%arg0: !fir.box>> // CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xcomplex> { // CHECK: ^bb0(%[[VAL_7:.*]]: index): -// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: %[[VAL_10:.*]] = fir.undefined complex -// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_9]], [0 : index] : (complex, f64) -> complex -// CHECK: %[[VAL_12:.*]] = fir.insert_value %[[VAL_11]], %[[VAL_9]], [1 : index] : (complex, f64) -> complex -// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_8]] to %[[VAL_3]]#1 step %[[VAL_8]] iter_args(%[[VAL_15:.*]] = %[[VAL_12]]) -> (complex) { +// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[VAL_9:.*]] = fir.undefined complex +// CHECK: %[[VAL_10:.*]] = fir.insert_value %[[VAL_9]], %[[VAL_8]], [0 : index] : (complex, f64) -> complex +// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_8]], [1 : index] : (complex, f64) -> complex +// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_3]]#1 step %[[VAL_12]] iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (complex) { // CHECK: %[[VAL_16:.*]] = arith.constant 0 : index // CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_16]] : (!fir.box>>, index) -> (index, index, index) // CHECK: %[[VAL_18:.*]] = arith.constant 1 : index @@ -116,12 +116,12 @@ func.func @sum_box_unknown_extent2(%arg0: !fir.box>> // CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1> // CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr> { // CHECK: ^bb0(%[[VAL_7:.*]]: index): -// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK: %[[VAL_10:.*]] = fir.undefined complex -// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_9]], [0 : index] : (complex, f64) -> complex -// CHECK: %[[VAL_12:.*]] = fir.insert_value %[[VAL_11]], %[[VAL_9]], [1 : index] : (complex, f64) -> complex -// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_8]] iter_args(%[[VAL_15:.*]] = %[[VAL_12]]) -> (complex) { +// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[VAL_9:.*]] = fir.undefined complex +// CHECK: %[[VAL_10:.*]] = fir.insert_value %[[VAL_9]], %[[VAL_8]], [0 : index] : (complex, f64) -> complex +// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_8]], [1 : index] : (complex, f64) -> complex +// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_4]] step %[[VAL_12]] iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (complex) { // CHECK: %[[VAL_16:.*]] = arith.constant 0 : index // CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_16]] : (!fir.box>>, index) -> (index, index, index) // CHECK: %[[VAL_18:.*]] = arith.constant 1 : index @@ -148,7 +148,7 @@ func.func @sum_expr_unknown_extent1(%arg0: !hlfir.expr) { return } // CHECK-LABEL: func.func @sum_expr_unknown_extent1( -// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr) { +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr) { // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32 // CHECK: %[[VAL_2:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> // CHECK: %[[VAL_3:.*]] = hlfir.get_extent %[[VAL_2]] {dim = 0 : index} : (!fir.shape<2>) -> index @@ -156,9 +156,9 @@ func.func @sum_expr_unknown_extent1(%arg0: !hlfir.expr) { // CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { // CHECK: ^bb0(%[[VAL_7:.*]]: index): -// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_8]] to %[[VAL_3]] step %[[VAL_8]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) { +// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_3]] step %[[VAL_9]] iter_args(%[[VAL_12:.*]] = %[[VAL_8]]) -> (f32) { // CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_7]] : (!hlfir.expr, index, index) -> f32 // CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_12]], %[[VAL_13]] : f32 // CHECK: fir.result %[[VAL_14]] : f32 @@ -174,7 +174,7 @@ func.func @sum_expr_unknown_extent2(%arg0: !hlfir.expr) { return } // CHECK-LABEL: func.func @sum_expr_unknown_extent2( -// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr) { +// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr) { // CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32 // CHECK: %[[VAL_2:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<2> // CHECK: %[[VAL_3:.*]] = hlfir.get_extent %[[VAL_2]] {dim = 0 : index} : (!fir.shape<2>) -> index @@ -182,9 +182,9 @@ func.func @sum_expr_unknown_extent2(%arg0: !hlfir.expr) { // CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr { // CHECK: ^bb0(%[[VAL_7:.*]]: index): -// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_8]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) { +// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_4]] step %[[VAL_9]] iter_args(%[[VAL_12:.*]] = %[[VAL_8]]) -> (f32) { // CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_11]] : (!hlfir.expr, index, index) -> f32 // CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_12]], %[[VAL_13]] : f32 // CHECK: fir.result %[[VAL_14]] : f32 @@ -208,15 +208,15 @@ func.func @sum_scalar_mask(%arg0: !hlfir.expr, %mask: !fir.ref) -> index // CHECK: %[[VAL_5:.*]] = arith.constant 3 : index // CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { -// CHECK: ^bb0(%[[VAL_8:.*]]: index): -// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_1]] : !fir.ref> +// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { +// CHECK: ^bb0(%[[VAL_9:.*]]: index): // CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_1]] : !fir.ref> -// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_9]] to %[[VAL_4]] step %[[VAL_9]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) { -// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<1>) -> i1 +// CHECK: %[[VAL_11:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_4]] step %[[VAL_11]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) { +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_7]] : (!fir.logical<1>) -> i1 // CHECK: %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (f32) { -// CHECK: %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_13]], %[[VAL_8]] : (!hlfir.expr, index, index) -> f32 +// CHECK: %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_13]], %[[VAL_9]] : (!hlfir.expr, index, index) -> f32 // CHECK: %[[VAL_18:.*]] = arith.addf %[[VAL_14]], %[[VAL_17]] : f32 // CHECK: fir.result %[[VAL_18]] : f32 // CHECK: } else { @@ -243,24 +243,24 @@ func.func @sum_scalar_boxed_mask(%arg0: !hlfir.expr, %mask: !fir.box) -> index // CHECK: %[[VAL_5:.*]] = arith.constant 3 : index // CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { -// CHECK: ^bb0(%[[VAL_8:.*]]: index): -// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_11:.*]] = fir.is_present %[[VAL_1]] : (!fir.box>) -> i1 -// CHECK: %[[VAL_12:.*]] = fir.if %[[VAL_11]] -> (!fir.logical<1>) { -// CHECK: %[[VAL_13:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> -// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref> -// CHECK: fir.result %[[VAL_14]] : !fir.logical<1> -// CHECK: } else { -// CHECK: %[[VAL_15:.*]] = arith.constant true -// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i1) -> !fir.logical<1> -// CHECK: fir.result %[[VAL_16]] : !fir.logical<1> -// CHECK: } -// CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_9]] to %[[VAL_4]] step %[[VAL_9]] iter_args(%[[VAL_19:.*]] = %[[VAL_10]]) -> (f32) { -// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_12]] : (!fir.logical<1>) -> i1 +// CHECK: %[[VAL_7:.*]] = fir.is_present %[[VAL_1]] : (!fir.box>) -> i1 +// CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (!fir.logical<1>) { +// CHECK: %[[VAL_9:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ref> +// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_9]] : !fir.ref> +// CHECK: fir.result %[[VAL_10]] : !fir.logical<1> +// CHECK: } else { +// CHECK: %[[VAL_11:.*]] = arith.constant true +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i1) -> !fir.logical<1> +// CHECK: fir.result %[[VAL_12]] : !fir.logical<1> +// CHECK: } +// CHECK: %[[VAL_13:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { +// CHECK: ^bb0(%[[VAL_14:.*]]: index): +// CHECK: %[[VAL_15:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_4]] step %[[VAL_16]] iter_args(%[[VAL_19:.*]] = %[[VAL_15]]) -> (f32) { +// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_8]] : (!fir.logical<1>) -> i1 // CHECK: %[[VAL_21:.*]] = fir.if %[[VAL_20]] -> (f32) { -// CHECK: %[[VAL_22:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_18]], %[[VAL_8]] : (!hlfir.expr, index, index) -> f32 +// CHECK: %[[VAL_22:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_18]], %[[VAL_14]] : (!hlfir.expr, index, index) -> f32 // CHECK: %[[VAL_23:.*]] = arith.addf %[[VAL_19]], %[[VAL_22]] : f32 // CHECK: fir.result %[[VAL_23]] : f32 // CHECK: } else { @@ -287,20 +287,20 @@ func.func @sum_array_mask(%arg0: !hlfir.expr, %mask: !fir.box) -> index // CHECK: %[[VAL_5:.*]] = arith.constant 3 : index // CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr { -// CHECK: ^bb0(%[[VAL_8:.*]]: index): -// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_7:.*]] = fir.is_present %[[VAL_1]] : (!fir.box>>) -> i1 +// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr { +// CHECK: ^bb0(%[[VAL_9:.*]]: index): // CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_11:.*]] = fir.is_present %[[VAL_1]] : (!fir.box>>) -> i1 -// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_9]] to %[[VAL_5]] step %[[VAL_9]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) { -// CHECK: %[[VAL_15:.*]] = fir.if %[[VAL_11]] -> (!fir.logical<1>) { +// CHECK: %[[VAL_11:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_5]] step %[[VAL_11]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) { +// CHECK: %[[VAL_15:.*]] = fir.if %[[VAL_7]] -> (!fir.logical<1>) { // CHECK: %[[VAL_16:.*]] = arith.constant 0 : index // CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_16]] : (!fir.box>>, index) -> (index, index, index) // CHECK: %[[VAL_18:.*]] = arith.constant 1 : index // CHECK: %[[VAL_19:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_18]] : (!fir.box>>, index) -> (index, index, index) // CHECK: %[[VAL_20:.*]] = arith.constant 1 : index // CHECK: %[[VAL_21:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_20]] : index -// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_8]], %[[VAL_21]] : index +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_9]], %[[VAL_21]] : index // CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_19]]#0, %[[VAL_20]] : index // CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_13]], %[[VAL_23]] : index // CHECK: %[[VAL_25:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_22]], %[[VAL_24]]) : (!fir.box>>, index, index) -> !fir.ref> @@ -313,7 +313,7 @@ func.func @sum_array_mask(%arg0: !hlfir.expr, %mask: !fir.box) -> i1 // CHECK: %[[VAL_30:.*]] = fir.if %[[VAL_29]] -> (f32) { -// CHECK: %[[VAL_31:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]], %[[VAL_13]] : (!hlfir.expr, index, index) -> f32 +// CHECK: %[[VAL_31:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_9]], %[[VAL_13]] : (!hlfir.expr, index, index) -> f32 // CHECK: %[[VAL_32:.*]] = arith.addf %[[VAL_14]], %[[VAL_31]] : f32 // CHECK: fir.result %[[VAL_32]] : f32 // CHECK: } else { @@ -342,9 +342,9 @@ func.func @sum_array_expr_mask(%arg0: !hlfir.expr, %mask: !hlfir.expr !fir.shape<1> // CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr { // CHECK: ^bb0(%[[VAL_8:.*]]: index): -// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_9]] to %[[VAL_5]] step %[[VAL_9]] iter_args(%[[VAL_13:.*]] = %[[VAL_10]]) -> (f32) { +// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_10:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_5]] step %[[VAL_10]] iter_args(%[[VAL_13:.*]] = %[[VAL_9]]) -> (f32) { // CHECK: %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]], %[[VAL_12]] : (!hlfir.expr>, index, index) -> !fir.logical<1> // CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1 // CHECK: %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (f32) { @@ -375,9 +375,9 @@ func.func @sum_unordered_reduction(%arg0: !hlfir.expr<2x3xf32>) { // CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_5:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { // CHECK: ^bb0(%[[VAL_6:.*]]: index): -// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_7]] to %[[VAL_2]] step %[[VAL_7]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (f32) { +// CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_2]] step %[[VAL_8]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_7]]) -> (f32) { // CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_6]] : (!hlfir.expr<2x3xf32>, index, index) -> f32 // CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_11]], %[[VAL_12]] fastmath : f32 // CHECK: fir.result %[[VAL_13]] : f32 @@ -387,28 +387,62 @@ func.func @sum_unordered_reduction(%arg0: !hlfir.expr<2x3xf32>) { // CHECK: return // CHECK: } -// negative: total reduction -func.func @sum_total_reduction(%arg0: !fir.box>) { +// total 1d reduction +func.func @sum_total_1d_reduction(%arg0: !fir.box>) { %cst = arith.constant 1 : i32 %res = hlfir.sum %arg0 dim %cst : (!fir.box>, i32) -> i32 return } -// CHECK-LABEL: func.func @sum_total_reduction( -// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK-LABEL: func.func @sum_total_1d_reduction( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { // CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32 -// CHECK: %[[VAL_2:.*]] = hlfir.sum %[[VAL_0]] dim %[[VAL_1]] : (!fir.box>, i32) -> i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = fir.do_loop %[[VAL_6:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_4]] unordered iter_args(%[[VAL_7:.*]] = %[[VAL_3]]) -> (i32) { +// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_10:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]]#0, %[[VAL_10]] : index +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_11]] : index +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_12]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref +// CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_7]], %[[VAL_14]] : i32 +// CHECK: fir.result %[[VAL_15]] : i32 +// CHECK: } // CHECK: return // CHECK: } -// negative: non-const dim -func.func @sum_non_const_dim(%arg0: !fir.box>, %dim: i32) { - %res = hlfir.sum %arg0 dim %dim : (!fir.box>, i32) -> i32 +// total 2d reduction +func.func @sum_total_2d_reduction(%arg0: !fir.box>) { + %res = hlfir.sum %arg0 : (!fir.box>) -> i32 return } -// CHECK-LABEL: func.func @sum_non_const_dim( -// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, -// CHECK-SAME: %[[VAL_1:.*]]: i32) { -// CHECK: %[[VAL_2:.*]] = hlfir.sum %[[VAL_0]] dim %[[VAL_1]] : (!fir.box>, i32) -> i32 +// CHECK-LABEL: func.func @sum_total_2d_reduction( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_2:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_1]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_3:.*]] = arith.constant 3 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_5]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_4]]) -> (i32) { +// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_2]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i32) { +// CHECK: %[[VAL_12:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_12]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_14]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_17:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_16]] : index +// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_10]], %[[VAL_17]] : index +// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_16]] : index +// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_7]], %[[VAL_19]] : index +// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_18]], %[[VAL_20]]) : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref +// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_11]], %[[VAL_22]] : i32 +// CHECK: fir.result %[[VAL_23]] : i32 +// CHECK: } +// CHECK: fir.result %[[VAL_9]] : i32 +// CHECK: } // CHECK: return // CHECK: }