diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index ef6aabbceacb7..8342458e00763 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -159,28 +159,162 @@ containsReadOrWriteEffectOn(const mlir::MemoryEffects::EffectInstance &effect, return mlir::AliasResult::NoAlias; } -// Returns true if the given array references represent identical -// or completely disjoint array slices. The callers may use this -// method when the alias analysis reports an alias of some kind, -// so that we can run Fortran specific analysis on the array slices -// to see if they are identical or disjoint. Note that the alias -// analysis are not able to give such an answer about the references. -static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) { +// Helper class for analyzing two array slices represented +// by two hlfir.designate operations. +class ArraySectionAnalyzer { +public: + // The result of the analyzis is one of the values below. + enum class SlicesOverlapKind { + // Slices overlap is unknown. + Unknown, + // Slices are definitely identical. + DefinitelyIdentical, + // Slices are definitely disjoint. + DefinitelyDisjoint, + // Slices may be either disjoint or identical, + // i.e. there is definitely no partial overlap. + EitherIdenticalOrDisjoint + }; + + // Analyzes two hlfir.designate results and returns the overlap kind. + // The callers may use this method when the alias analysis reports + // an alias of some kind, so that we can run Fortran specific analysis + // on the array slices to see if they are identical or disjoint. + // Note that the alias analysis are not able to give such an answer + // about the references. + static SlicesOverlapKind analyze(mlir::Value ref1, mlir::Value ref2); + +private: + struct SectionDesc { + // An array section is described by tuple. + // If the designator's subscript is not a triple, then + // the section descriptor is constructed as . + mlir::Value lb, ub, stride; + + SectionDesc(mlir::Value lb, mlir::Value ub, mlir::Value stride) + : lb(lb), ub(ub), stride(stride) { + assert(lb && "lower bound or index must be specified"); + normalize(); + } + + // Normalize the section descriptor: + // 1. If UB is nullptr, then it is set to LB. + // 2. If LB==UB, then stride does not matter, + // so it is reset to nullptr. + // 3. If STRIDE==1, then it is reset to nullptr. + void normalize() { + if (!ub) + ub = lb; + if (lb == ub) + stride = nullptr; + if (stride) + if (auto val = fir::getIntIfConstant(stride)) + if (*val == 1) + stride = nullptr; + } + + bool operator==(const SectionDesc &other) const { + return lb == other.lb && ub == other.ub && stride == other.stride; + } + }; + + // Given an operand_iterator over the indices operands, + // read the subscript values and return them as SectionDesc + // updating the iterator. If isTriplet is true, + // the subscript is a triplet, and the result is . + // Otherwise, the subscript is a scalar index, and the result + // is . + static SectionDesc readSectionDesc(mlir::Operation::operand_iterator &it, + bool isTriplet) { + if (isTriplet) + return {*it++, *it++, *it++}; + return {*it++, nullptr, nullptr}; + } + + // Return the ordered lower and upper bounds of the section. + // If stride is known to be non-negative, then the ordered + // bounds match the of the descriptor. + // If stride is known to be negative, then the ordered + // bounds are of the descriptor. + // If stride is unknown, we cannot deduce any order, + // so the result is + static std::pair + getOrderedBounds(const SectionDesc &desc) { + mlir::Value stride = desc.stride; + // Null stride means stride=1. + if (!stride) + return {desc.lb, desc.ub}; + // Reverse the bounds, if stride is negative. + if (auto val = fir::getIntIfConstant(stride)) { + if (*val >= 0) + return {desc.lb, desc.ub}; + else + return {desc.ub, desc.lb}; + } + + return {nullptr, nullptr}; + } + + // Given two array sections and + // , return true only if the sections + // are known to be disjoint. + // + // For example, for any positive constant C: + // X:Y does not overlap with (Y+C):Z + // X:Y does not overlap with Z:(X-C) + static bool areDisjointSections(const SectionDesc &desc1, + const SectionDesc &desc2) { + auto [lb1, ub1] = getOrderedBounds(desc1); + auto [lb2, ub2] = getOrderedBounds(desc2); + if (!lb1 || !lb2) + return false; + // Note that this comparison must be made on the ordered bounds, + // otherwise 'a(x:y:1) = a(z:x-1:-1) + 1' may be incorrectly treated + // as not overlapping (x=2, y=10, z=9). + if (isLess(ub1, lb2) || isLess(ub2, lb1)) + return true; + return false; + } + + // Given two array sections and + // , return true only if the sections + // are known to be identical. + // + // For example: + // + // + // + // These sections are identical, from the point of which array + // elements are being addresses, even though the shape + // of the array slices might be different. + static bool areIdenticalSections(const SectionDesc &desc1, + const SectionDesc &desc2) { + if (desc1 == desc2) + return true; + return false; + } + + // Return true, if v1 is known to be less than v2. + static bool isLess(mlir::Value v1, mlir::Value v2); +}; + +ArraySectionAnalyzer::SlicesOverlapKind +ArraySectionAnalyzer::analyze(mlir::Value ref1, mlir::Value ref2) { if (ref1 == ref2) - return true; + return SlicesOverlapKind::DefinitelyIdentical; auto des1 = ref1.getDefiningOp(); auto des2 = ref2.getDefiningOp(); // We only support a pair of designators right now. if (!des1 || !des2) - return false; + return SlicesOverlapKind::Unknown; if (des1.getMemref() != des2.getMemref()) { // If the bases are different, then there is unknown overlap. LLVM_DEBUG(llvm::dbgs() << "No identical base for:\n" << des1 << "and:\n" << des2 << "\n"); - return false; + return SlicesOverlapKind::Unknown; } // Require all components of the designators to be the same. @@ -194,104 +328,105 @@ static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) { LLVM_DEBUG(llvm::dbgs() << "Different designator specs for:\n" << des1 << "and:\n" << des2 << "\n"); - return false; - } - - if (des1.getIsTriplet() != des2.getIsTriplet()) { - LLVM_DEBUG(llvm::dbgs() << "Different sections for:\n" - << des1 << "and:\n" - << des2 << "\n"); - return false; + return SlicesOverlapKind::Unknown; } // Analyze the subscripts. - // For example: - // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0) shape %9 - // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %1) shape %9 - // - // If all the triplets (section speficiers) are the same, then - // we do not care if %0 is equal to %1 - the slices are either - // identical or completely disjoint. auto des1It = des1.getIndices().begin(); auto des2It = des2.getIndices().begin(); bool identicalTriplets = true; - for (bool isTriplet : des1.getIsTriplet()) { - if (isTriplet) { - for (int i = 0; i < 3; ++i) - if (*des1It++ != *des2It++) { - LLVM_DEBUG(llvm::dbgs() << "Triplet mismatch for:\n" - << des1 << "and:\n" - << des2 << "\n"); - identicalTriplets = false; - break; - } - } else { - ++des1It; - ++des2It; + bool identicalIndices = true; + for (auto [isTriplet1, isTriplet2] : + llvm::zip(des1.getIsTriplet(), des2.getIsTriplet())) { + SectionDesc desc1 = readSectionDesc(des1It, isTriplet1); + SectionDesc desc2 = readSectionDesc(des2It, isTriplet2); + + // See if we can prove that any of the sections do not overlap. + // This is mostly a Polyhedron/nf performance hack that looks for + // particular relations between the lower and upper bounds + // of the array sections, e.g. for any positive constant C: + // X:Y does not overlap with (Y+C):Z + // X:Y does not overlap with Z:(X-C) + if (areDisjointSections(desc1, desc2)) + return SlicesOverlapKind::DefinitelyDisjoint; + + if (!areIdenticalSections(desc1, desc2)) { + if (isTriplet1 || isTriplet2) { + // For example: + // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %0) + // hlfir.designate %6#0 (%c2:%c7999:%c1, %c1:%c120:%c1, %1) + // + // If all the triplets (section speficiers) are the same, then + // we do not care if %0 is equal to %1 - the slices are either + // identical or completely disjoint. + // + // Also, treat these as identical sections: + // hlfir.designate %6#0 (%c2:%c2:%c1) + // hlfir.designate %6#0 (%c2) + identicalTriplets = false; + LLVM_DEBUG(llvm::dbgs() << "Triplet mismatch for:\n" + << des1 << "and:\n" + << des2 << "\n"); + } else { + identicalIndices = false; + LLVM_DEBUG(llvm::dbgs() << "Indices mismatch for:\n" + << des1 << "and:\n" + << des2 << "\n"); + } } } - if (identicalTriplets) - return true; - // See if we can prove that any of the triplets do not overlap. - // This is mostly a Polyhedron/nf performance hack that looks for - // particular relations between the lower and upper bounds - // of the array sections, e.g. for any positive constant C: - // X:Y does not overlap with (Y+C):Z - // X:Y does not overlap with Z:(X-C) - auto displacedByConstant = [](mlir::Value v1, mlir::Value v2) { - auto removeConvert = [](mlir::Value v) -> mlir::Operation * { - auto *op = v.getDefiningOp(); - while (auto conv = mlir::dyn_cast_or_null(op)) - op = conv.getValue().getDefiningOp(); - return op; - }; + if (identicalTriplets) { + if (identicalIndices) + return SlicesOverlapKind::DefinitelyIdentical; + else + return SlicesOverlapKind::EitherIdenticalOrDisjoint; + } - auto isPositiveConstant = [](mlir::Value v) -> bool { - if (auto conOp = - mlir::dyn_cast(v.getDefiningOp())) - if (auto iattr = mlir::dyn_cast(conOp.getValue())) - return iattr.getInt() > 0; - return false; - }; + LLVM_DEBUG(llvm::dbgs() << "Different sections for:\n" + << des1 << "and:\n" + << des2 << "\n"); + return SlicesOverlapKind::Unknown; +} - auto *op1 = removeConvert(v1); - auto *op2 = removeConvert(v2); - if (!op1 || !op2) - return false; - if (auto addi = mlir::dyn_cast(op2)) - if ((addi.getLhs().getDefiningOp() == op1 && - isPositiveConstant(addi.getRhs())) || - (addi.getRhs().getDefiningOp() == op1 && - isPositiveConstant(addi.getLhs()))) - return true; - if (auto subi = mlir::dyn_cast(op1)) - if (subi.getLhs().getDefiningOp() == op2 && - isPositiveConstant(subi.getRhs())) - return true; +bool ArraySectionAnalyzer::isLess(mlir::Value v1, mlir::Value v2) { + auto removeConvert = [](mlir::Value v) -> mlir::Operation * { + auto *op = v.getDefiningOp(); + while (auto conv = mlir::dyn_cast_or_null(op)) + op = conv.getValue().getDefiningOp(); + return op; + }; + + auto isPositiveConstant = [](mlir::Value v) -> bool { + if (auto val = fir::getIntIfConstant(v)) + return *val > 0; return false; }; - des1It = des1.getIndices().begin(); - des2It = des2.getIndices().begin(); - for (bool isTriplet : des1.getIsTriplet()) { - if (isTriplet) { - mlir::Value des1Lb = *des1It++; - mlir::Value des1Ub = *des1It++; - mlir::Value des2Lb = *des2It++; - mlir::Value des2Ub = *des2It++; - // Ignore strides. - ++des1It; - ++des2It; - if (displacedByConstant(des1Ub, des2Lb) || - displacedByConstant(des2Ub, des1Lb)) - return true; - } else { - ++des1It; - ++des2It; - } - } + auto *op1 = removeConvert(v1); + auto *op2 = removeConvert(v2); + if (!op1 || !op2) + return false; + // Check if they are both constants. + if (auto val1 = fir::getIntIfConstant(op1->getResult(0))) + if (auto val2 = fir::getIntIfConstant(op2->getResult(0))) + return *val1 < *val2; + + // Handle some variable cases (C > 0): + // v2 = v1 + C + // v2 = C + v1 + // v1 = v2 - C + if (auto addi = mlir::dyn_cast(op2)) + if ((addi.getLhs().getDefiningOp() == op1 && + isPositiveConstant(addi.getRhs())) || + (addi.getRhs().getDefiningOp() == op1 && + isPositiveConstant(addi.getLhs()))) + return true; + if (auto subi = mlir::dyn_cast(op1)) + if (subi.getLhs().getDefiningOp() == op2 && + isPositiveConstant(subi.getRhs())) + return true; return false; } @@ -405,21 +540,27 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { if (!res.isPartial()) { if (auto designate = effect.getValue().getDefiningOp()) { - if (!areIdenticalOrDisjointSlices(match.array, designate.getMemref())) { + ArraySectionAnalyzer::SlicesOverlapKind overlap = + ArraySectionAnalyzer::analyze(match.array, designate.getMemref()); + if (overlap == + ArraySectionAnalyzer::SlicesOverlapKind::DefinitelyDisjoint) + continue; + + if (overlap == ArraySectionAnalyzer::SlicesOverlapKind::Unknown) { LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate << " at " << elemental.getLoc() << "\n"); return std::nullopt; } auto indices = designate.getIndices(); auto elementalIndices = elemental.getIndices(); - if (indices.size() != elementalIndices.size()) { - LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate - << " at " << elemental.getLoc() << "\n"); - return std::nullopt; - } - if (std::equal(indices.begin(), indices.end(), elementalIndices.begin(), + if (indices.size() == elementalIndices.size() && + std::equal(indices.begin(), indices.end(), elementalIndices.begin(), elementalIndices.end())) continue; + + LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate + << " at " << elemental.getLoc() << "\n"); + return std::nullopt; } } LLVM_DEBUG(llvm::dbgs() << "disallowed side-effect: " << effect.getValue() diff --git a/flang/test/HLFIR/opt-array-slice-assign.fir b/flang/test/HLFIR/opt-array-slice-assign.fir index 11bd97c115834..3db47b1da8cd3 100644 --- a/flang/test/HLFIR/opt-array-slice-assign.fir +++ b/flang/test/HLFIR/opt-array-slice-assign.fir @@ -382,3 +382,427 @@ func.func @_QPtest6(%arg0: !fir.ref> {fir.bindc_name = "x"}, } // CHECK-LABEL: func.func @_QPtest6( // CHECK-NOT: hlfir.elemental + +// Check that 'x(9,:)=SUM(x(1:8,:),DIM=1)' is optimized +// due to the LHS and RHS being disjoint array sections. +func.func @test_disjoint_triple_index(%arg0: !fir.box> {fir.bindc_name = "x"}) { + %cst = arith.constant 0.000000e+00 : f32 + %c9 = arith.constant 9 : index + %c0 = arith.constant 0 : index + %c8 = arith.constant 8 : index + %c1 = arith.constant 1 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2:3 = fir.box_dims %1#1, %c1 : (!fir.box>, index) -> (index, index, index) + %3 = arith.cmpi sgt, %2#1, %c0 : index + %4 = arith.select %3, %2#1, %c0 : index + %5 = fir.shape %c8, %4 : (index, index) -> !fir.shape<2> + %6 = hlfir.designate %1#0 (%c1:%c8:%c1, %c1:%2#1:%c1) shape %5 : (!fir.box>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box> + %7 = fir.shape %4 : (index) -> !fir.shape<1> + %8 = hlfir.elemental %7 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %10 = fir.alloca f32 {bindc_name = ".sum.reduction"} + fir.store %cst to %10 : !fir.ref + fir.do_loop %arg2 = %c1 to %c8 step %c1 unordered { + %12 = fir.load %10 : !fir.ref + %13 = hlfir.designate %6 (%arg2, %arg1) : (!fir.box>, index, index) -> !fir.ref + %14 = fir.load %13 : !fir.ref + %15 = arith.addf %12, %14 fastmath : f32 + fir.store %15 to %10 : !fir.ref + } + %11 = fir.load %10 : !fir.ref + hlfir.yield_element %11 : f32 + } + %9 = hlfir.designate %1#0 (%c9, %c1:%2#1:%c1) shape %7 : (!fir.box>, index, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %8 to %9 : !hlfir.expr, !fir.box> + hlfir.destroy %8 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_disjoint_triple_index( +// CHECK-NOT: hlfir.elemental + +// Check that 'x(9,:)=SUM(x(9:9,:),DIM=1)' is not optimized. +func.func @test_overlapping_triple_index(%arg0: !fir.box> {fir.bindc_name = "x"}) { + %cst = arith.constant 0.000000e+00 : f32 + %c9 = arith.constant 9 : index + %c0 = arith.constant 0 : index + %c8 = arith.constant 8 : index + %c1 = arith.constant 1 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2:3 = fir.box_dims %1#1, %c1 : (!fir.box>, index) -> (index, index, index) + %3 = arith.cmpi sgt, %2#1, %c0 : index + %4 = arith.select %3, %2#1, %c0 : index + %5 = fir.shape %c8, %4 : (index, index) -> !fir.shape<2> + %6 = hlfir.designate %1#0 (%c9:%c9:%c1, %c1:%2#1:%c1) shape %5 : (!fir.box>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.box> + %7 = fir.shape %4 : (index) -> !fir.shape<1> + %8 = hlfir.elemental %7 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg1: index): + %10 = fir.alloca f32 {bindc_name = ".sum.reduction"} + fir.store %cst to %10 : !fir.ref + fir.do_loop %arg2 = %c1 to %c8 step %c1 unordered { + %12 = fir.load %10 : !fir.ref + %13 = hlfir.designate %6 (%arg2, %arg1) : (!fir.box>, index, index) -> !fir.ref + %14 = fir.load %13 : !fir.ref + %15 = arith.addf %12, %14 fastmath : f32 + fir.store %15 to %10 : !fir.ref + } + %11 = fir.load %10 : !fir.ref + hlfir.yield_element %11 : f32 + } + %9 = hlfir.designate %1#0 (%c9, %c1:%2#1:%c1) shape %7 : (!fir.box>, index, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %8 to %9 : !hlfir.expr, !fir.box> + hlfir.destroy %8 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_overlapping_triple_index( +// CHECK: hlfir.elemental + +// Check that 'x(9:ub) = x(lb:6) + 1' is optimized, +// even though the lb and ub are unknown. +func.func @test_disjoint_unknown_bounds(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "lb"}, %arg2: !fir.ref {fir.bindc_name = "ub"}) { + %c-8 = arith.constant -8 : index + %c7 = arith.constant 7 : index + %c9 = arith.constant 9 : index + %cst = arith.constant 1.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c6 = arith.constant 6 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestElb"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtestEub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.subi %c7, %5 : index + %7 = arith.cmpi sgt, %6, %c0 : index + %8 = arith.select %7, %6, %c0 : index + %9 = fir.shape %8 : (index) -> !fir.shape<1> + %10 = hlfir.designate %3#0 (%5:%c6:%c1) shape %9 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %11 = hlfir.elemental %9 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %19 = hlfir.designate %10 (%arg3) : (!fir.box>, index) -> !fir.ref + %20 = fir.load %19 : !fir.ref + %21 = arith.addf %20, %cst fastmath : f32 + hlfir.yield_element %21 : f32 + } + %12 = fir.load %2#0 : !fir.ref + %13 = fir.convert %12 : (i32) -> index + %14 = arith.addi %13, %c-8 : index + %15 = arith.cmpi sgt, %14, %c0 : index + %16 = arith.select %15, %14, %c0 : index + %17 = fir.shape %16 : (index) -> !fir.shape<1> + %18 = hlfir.designate %3#0 (%c9:%13:%c1) shape %17 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %11 to %18 : !hlfir.expr, !fir.box> + hlfir.destroy %11 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_disjoint_unknown_bounds( +// CHECK-NOT: hlfir.elemental + +// Check that 'x(lb1:14) = x(lb2:15:-1) + 1' is optimized, +// even though lb1 and lb2 are unknown. +func.func @test_disjoint_unknown_bounds_negative_stride(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "lb1"}, %arg2: !fir.ref {fir.bindc_name = "lb2"}) { + %c1 = arith.constant 1 : index + %c14 = arith.constant 14 : index + %cst = arith.constant 1.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c-1 = arith.constant -1 : index + %c15 = arith.constant 15 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestElb1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtestElb2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %4 = fir.load %2#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.subi %c14, %5 : index + %7 = arith.divsi %6, %c-1 : index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %9 : (index) -> !fir.shape<1> + %11 = hlfir.designate %3#0 (%5:%c15:%c-1) shape %10 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %12 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %20 = hlfir.designate %11 (%arg3) : (!fir.box>, index) -> !fir.ref + %21 = fir.load %20 : !fir.ref + %22 = arith.addf %21, %cst fastmath : f32 + hlfir.yield_element %22 : f32 + } + %13 = fir.load %1#0 : !fir.ref + %14 = fir.convert %13 : (i32) -> index + %15 = arith.subi %c15, %14 : index + %16 = arith.cmpi sgt, %15, %c0 : index + %17 = arith.select %16, %15, %c0 : index + %18 = fir.shape %17 : (index) -> !fir.shape<1> + %19 = hlfir.designate %3#0 (%14:%c14:%c1) shape %18 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %12 to %19 : !hlfir.expr, !fir.box> + hlfir.destroy %12 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_disjoint_unknown_bounds_negative_stride( +// CHECK-NOT: hlfir.elemental + +// Check that 'x(1:5) = x(5:1:-1) + 1' is not optimized. +func.func @test_overlap_known_triplets_negative_stride(%arg0: !fir.box> {fir.bindc_name = "x"}) { + %cst = arith.constant 1.000000e+00 : f32 + %c-1 = arith.constant -1 : index + %c1 = arith.constant 1 : index + %c5 = arith.constant 5 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2 = fir.shape %c5 : (index) -> !fir.shape<1> + %3 = hlfir.designate %1#0 (%c5:%c1:%c-1) shape %2 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %4 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<5xf32> { + ^bb0(%arg1: index): + %6 = hlfir.designate %3 (%arg1) : (!fir.box>, index) -> !fir.ref + %7 = fir.load %6 : !fir.ref + %8 = arith.addf %7, %cst fastmath : f32 + hlfir.yield_element %8 : f32 + } + %5 = hlfir.designate %1#0 (%c1:%c5:%c1) shape %2 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %4 to %5 : !hlfir.expr<5xf32>, !fir.box> + hlfir.destroy %4 : !hlfir.expr<5xf32> + return +} +// CHECK-LABEL: func.func @test_overlap_known_triplets_negative_stride( +// CHECK: hlfir.elemental + +// Check that 'x(1:5) = x(6:ub:-1) + 1' is not optimized. +func.func @test_overlap_unknown_bound_negative_stride(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "ub"}) { + %c-7 = arith.constant -7 : index + %c5 = arith.constant 5 : index + %c1 = arith.constant 1 : index + %cst = arith.constant 1.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c-1 = arith.constant -1 : index + %c6 = arith.constant 6 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3 = fir.load %1#0 : !fir.ref + %4 = fir.convert %3 : (i32) -> index + %5 = arith.addi %4, %c-7 : index + %6 = arith.divsi %5, %c-1 : index + %7 = arith.cmpi sgt, %6, %c0 : index + %8 = arith.select %7, %6, %c0 : index + %9 = fir.shape %8 : (index) -> !fir.shape<1> + %10 = hlfir.designate %2#0 (%c6:%4:%c-1) shape %9 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %11 = hlfir.elemental %9 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg2: index): + %14 = hlfir.designate %10 (%arg2) : (!fir.box>, index) -> !fir.ref + %15 = fir.load %14 : !fir.ref + %16 = arith.addf %15, %cst fastmath : f32 + hlfir.yield_element %16 : f32 + } + %12 = fir.shape %c5 : (index) -> !fir.shape<1> + %13 = hlfir.designate %2#0 (%c1:%c5:%c1) shape %12 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %11 to %13 : !hlfir.expr, !fir.box> + hlfir.destroy %11 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_overlap_unknown_bound_negative_stride( +// CHECK: hlfir.elemental + +// Check that 'x(1:5) = x(6:ub:stride) + 1' is not optimized. +func.func @test_overlap_unknown_bound_and_stride(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "ub"}, %arg2: !fir.ref {fir.bindc_name = "stride"}) { + %c5 = arith.constant 5 : index + %c1 = arith.constant 1 : index + %cst = arith.constant 1.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c6 = arith.constant 6 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtestEstride"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %4 = fir.load %2#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = fir.load %1#0 : !fir.ref + %7 = fir.convert %6 : (i32) -> index + %8 = arith.subi %5, %c6 : index + %9 = arith.addi %8, %7 : index + %10 = arith.divsi %9, %7 : index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.shape %12 : (index) -> !fir.shape<1> + %14 = hlfir.designate %3#0 (%c6:%5:%7) shape %13 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %15 = hlfir.elemental %13 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %18 = hlfir.designate %14 (%arg3) : (!fir.box>, index) -> !fir.ref + %19 = fir.load %18 : !fir.ref + %20 = arith.addf %19, %cst fastmath : f32 + hlfir.yield_element %20 : f32 + } + %16 = fir.shape %c5 : (index) -> !fir.shape<1> + %17 = hlfir.designate %3#0 (%c1:%c5:%c1) shape %16 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %15 to %17 : !hlfir.expr, !fir.box> + hlfir.destroy %15 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_overlap_unknown_bound_and_stride( +// CHECK: hlfir.elemental + +// Check that 'a(2:2:s1) = a(2:2:s2) + 1' is optimized, +// even though the strides are unknown. +func.func @test_identical_1element_unknown_strides(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "s1"}, %arg2: !fir.ref {fir.bindc_name = "s2"}) { + %c1_i32 = arith.constant 1 : i32 + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEa"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEs1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtestEs2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4 = fir.load %3#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.divsi %5, %5 : index + %7 = arith.cmpi sgt, %6, %c0 : index + %8 = arith.select %7, %6, %c0 : index + %9 = fir.shape %8 : (index) -> !fir.shape<1> + %10 = hlfir.designate %1#0 (%c2:%c2:%5) shape %9 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %11 = hlfir.elemental %9 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %19 = hlfir.designate %10 (%arg3) : (!fir.box>, index) -> !fir.ref + %20 = fir.load %19 : !fir.ref + %21 = arith.addi %20, %c1_i32 : i32 + hlfir.yield_element %21 : i32 + } + %12 = fir.load %2#0 : !fir.ref + %13 = fir.convert %12 : (i32) -> index + %14 = arith.divsi %13, %13 : index + %15 = arith.cmpi sgt, %14, %c0 : index + %16 = arith.select %15, %14, %c0 : index + %17 = fir.shape %16 : (index) -> !fir.shape<1> + %18 = hlfir.designate %1#0 (%c2:%c2:%13) shape %17 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %11 to %18 : !hlfir.expr, !fir.box> + hlfir.destroy %11 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_identical_1element_unknown_strides( +// CHECK-NOT: hlfir.elemental + +func.func @test_disjoint_1element_unknown_strides(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "s1"}, %arg2: !fir.ref {fir.bindc_name = "s2"}) { + %c2 = arith.constant 2 : index + %c1_i32 = arith.constant 1 : i32 + %c0 = arith.constant 0 : index + %c3 = arith.constant 3 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEa"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEs1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtestEs2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4 = fir.load %3#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.divsi %5, %5 : index + %7 = arith.cmpi sgt, %6, %c0 : index + %8 = arith.select %7, %6, %c0 : index + %9 = fir.shape %8 : (index) -> !fir.shape<1> + %10 = hlfir.designate %1#0 (%c3:%c3:%5) shape %9 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %11 = hlfir.elemental %9 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %19 = hlfir.designate %10 (%arg3) : (!fir.box>, index) -> !fir.ref + %20 = fir.load %19 : !fir.ref + %21 = arith.addi %20, %c1_i32 : i32 + hlfir.yield_element %21 : i32 + } + %12 = fir.load %2#0 : !fir.ref + %13 = fir.convert %12 : (i32) -> index + %14 = arith.divsi %13, %13 : index + %15 = arith.cmpi sgt, %14, %c0 : index + %16 = arith.select %15, %14, %c0 : index + %17 = fir.shape %16 : (index) -> !fir.shape<1> + %18 = hlfir.designate %1#0 (%c2:%c2:%13) shape %17 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %11 to %18 : !hlfir.expr, !fir.box> + hlfir.destroy %11 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_disjoint_1element_unknown_strides( +// CHECK-NOT: hlfir.elemental + +// Check that 'a(x:y:1) = a(z:x-1:-1) + 1' is not optimized. +// The bounds are like in Polyhedron/nf, but the second +// stride is negative, so it cannot be optimized. +func.func @test_overlap_sub1_negative_stride(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "x"}, %arg2: !fir.ref {fir.bindc_name = "y"}, %arg3: !fir.ref {fir.bindc_name = "z"}) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c-1 = arith.constant -1 : index + %c1_i32 = arith.constant 1 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEa"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtestEy"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg3 dummy_scope %0 {uniq_name = "_QFtestEz"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %5 = fir.load %4#0 : !fir.ref + %6 = fir.load %2#0 : !fir.ref + %7 = arith.subi %6, %c1_i32 overflow : i32 + %8 = fir.convert %5 : (i32) -> index + %9 = fir.convert %7 : (i32) -> index + %10 = arith.subi %9, %8 : index + %11 = arith.addi %10, %c-1 : index + %12 = arith.divsi %11, %c-1 : index + %13 = arith.cmpi sgt, %12, %c0 : index + %14 = arith.select %13, %12, %c0 : index + %15 = fir.shape %14 : (index) -> !fir.shape<1> + %16 = hlfir.designate %1#0 (%8:%9:%c-1) shape %15 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %17 = hlfir.elemental %15 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg4: index): + %27 = hlfir.designate %16 (%arg4) : (!fir.box>, index) -> !fir.ref + %28 = fir.load %27 : !fir.ref + %29 = arith.addi %28, %c1_i32 : i32 + hlfir.yield_element %29 : i32 + } + %18 = fir.load %3#0 : !fir.ref + %19 = fir.convert %6 : (i32) -> index + %20 = fir.convert %18 : (i32) -> index + %21 = arith.subi %20, %19 : index + %22 = arith.addi %21, %c1 : index + %23 = arith.cmpi sgt, %22, %c0 : index + %24 = arith.select %23, %22, %c0 : index + %25 = fir.shape %24 : (index) -> !fir.shape<1> + %26 = hlfir.designate %1#0 (%19:%20:%c1) shape %25 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %17 to %26 : !hlfir.expr, !fir.box> + hlfir.destroy %17 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_overlap_sub1_negative_stride( +// CHECK: hlfir.elemental + +// Check that 'x(1:5) = x(16:8:stride) + 1' is not optimized. +// TODO: because the bounds are known, we can still deduce +// no overlap: +// * If stride is negative, then (1:5) does not overlap +// with (8:16). +// * If stride is positive, then (16:8:stride) is an empty +// slice, thus it does not overlap with (1:5). +func.func @test_disjoint_known_bounds_unknown_stride(%arg0: !fir.box> {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "ub"}, %arg2: !fir.ref {fir.bindc_name = "stride"}) { + %c-8 = arith.constant -8 : index + %c5 = arith.constant 5 : index + %c1 = arith.constant 1 : index + %cst = arith.constant 1.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c8 = arith.constant 8 : index + %c16 = arith.constant 16 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtestEstride"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEx"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %4 = fir.load %1#0 : !fir.ref + %5 = fir.convert %4 : (i32) -> index + %6 = arith.addi %5, %c-8 : index + %7 = arith.divsi %6, %5 : index + %8 = arith.cmpi sgt, %7, %c0 : index + %9 = arith.select %8, %7, %c0 : index + %10 = fir.shape %9 : (index) -> !fir.shape<1> + %11 = hlfir.designate %3#0 (%c16:%c8:%5) shape %10 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %12 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %15 = hlfir.designate %11 (%arg3) : (!fir.box>, index) -> !fir.ref + %16 = fir.load %15 : !fir.ref + %17 = arith.addf %16, %cst fastmath : f32 + hlfir.yield_element %17 : f32 + } + %13 = fir.shape %c5 : (index) -> !fir.shape<1> + %14 = hlfir.designate %3#0 (%c1:%c5:%c1) shape %13 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %12 to %14 : !hlfir.expr, !fir.box> + hlfir.destroy %12 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @test_disjoint_known_bounds_unknown_stride( +// CHECK: hlfir.elemental