diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index 9327e7ad5875c..ef6aabbceacb7 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -338,34 +338,20 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { if (!fir::isa_trivial(eleTy)) return std::nullopt; - // the array must have the same shape as the elemental. CSE should have - // deduplicated the fir.shape operations where they are provably the same - // so we just have to check for the same ssa value - // TODO: add more ways of getting the shape of the array - mlir::Value arrayShape; - if (match.array.getDefiningOp()) - arrayShape = - mlir::TypeSwitch( - match.array.getDefiningOp()) - .Case([](hlfir::DesignateOp designate) { - return designate.getShape(); - }) - .Case([](hlfir::DeclareOp declare) { return declare.getShape(); }) - .Default([](mlir::Operation *) { return mlir::Value{}; }); - if (!arrayShape) { - LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at " - << elemental->getLoc() << "\n"); + // The array must have the same shape as the elemental. + // + // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be + // conformable unless the lhs is an allocatable array. In HLFIR we can + // see this from the presence or absence of the realloc attribute on + // hlfir.assign. If it is not a realloc assignment, we can trust that + // the shapes do conform. + // + // TODO: the lhs's shape is dynamic, so it is hard to prove that + // there is no reallocation of the lhs due to the assignment. + // We can probably try generating multiple versions of the code + // with checking for the shape match, length parameters match, etc. + if (match.assign.getRealloc()) return std::nullopt; - } - if (arrayShape != elemental.getShape()) { - // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be - // conformable unless the lhs is an allocatable array. In HLFIR we can - // see this from the presence or absence of the realloc attribute on - // hlfir.assign. If it is not a realloc assignment, we can trust that - // the shapes do conform - if (match.assign.getRealloc()) - return std::nullopt; - } // the transformation wants to apply the elemental in a do-loop at the // hlfir.assign, check there are no effects which make this unsafe diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir index 45993c5eee0c9..5fa482a7b904e 100644 --- a/flang/test/HLFIR/minloc-elemental.fir +++ b/flang/test/HLFIR/minloc-elemental.fir @@ -188,67 +188,65 @@ func.func @_QPtest_kind2_convert(%arg0: !fir.box> {fir.bindc_n hlfir.destroy %6 : !hlfir.expr> return } -// The minloc has other uses, not an assign that gets optimized out. -// CHECK-LABEL: _QPtest_kind2_convert -// CHECK-SAME: (%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { -// CHECK-NEXT: %false = arith.constant false -// CHECK-NEXT: %true = arith.constant true -// CHECK-NEXT: %c2147483647_i32 = arith.constant 2147483647 : i32 -// CHECK-NEXT: %c1_i16 = arith.constant 1 : i16 -// CHECK-NEXT: %c0 = arith.constant 0 : index -// CHECK-NEXT: %c0_i16 = arith.constant 0 : i16 -// CHECK-NEXT: %c1 = arith.constant 1 : index -// CHECK-NEXT: %[[V0:.*]] = fir.alloca i16 -// CHECK-NEXT: %[[V1:.*]] = fir.alloca !fir.array<1xi16> -// CHECK-NEXT: %[[V2:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) -// CHECK-NEXT: %[[V3:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) -// CHECK-NEXT: %[[V4:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) -// CHECK-NEXT: %[[V5:.*]] = fir.load %[[V4]]#0 : !fir.ref -// CHECK-NEXT: %[[V6:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref>, index) -> !fir.ref -// CHECK-NEXT: fir.store %c0_i16 to %[[V6]] : !fir.ref -// CHECK-NEXT: fir.store %c0_i16 to %[[V0]] : !fir.ref -// CHECK-NEXT: %[[V7:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box>, index) -> (index, index, index) -// CHECK-NEXT: %[[V8:.*]] = arith.subi %[[V7]]#1, %c1 : index -// CHECK-NEXT: %[[V9:.*]] = fir.do_loop %arg3 = %c0 to %[[V8]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) { -// CHECK-NEXT: %[[V15:.*]] = arith.addi %arg3, %c1 : index -// CHECK-NEXT: %[[V16:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]]) : (!fir.box>, index) -> !fir.ref -// CHECK-NEXT: %[[V17:.*]] = fir.load %[[V16]] : !fir.ref -// CHECK-NEXT: %[[V18:.*]] = arith.cmpi sge, %[[V17]], %[[V5]] : i32 -// CHECK-NEXT: %[[V19:.*]] = fir.if %[[V18]] -> (i32) { -// CHECK-NEXT: %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref -// CHECK-NEXT: %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]]) : (!fir.box>, index) -> !fir.ref -// CHECK-NEXT: %[[V24:.*]] = fir.load %[[V23]] : !fir.ref -// CHECK-NEXT: %[[V25:.*]] = arith.cmpi slt, %[[V24]], %arg4 : i32 -// CHECK-NEXT: %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i16) -> i1 -// CHECK-NEXT: %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1 -// CHECK-NEXT: %[[ORCOND:.*]] = arith.ori %[[V25]], %[[ISFIRSTNOT]] : i1 -// CHECK-NEXT: %[[V26:.*]] = fir.if %[[ORCOND]] -> (i32) { -// CHECK-NEXT: fir.store %c1_i16 to %[[V0]] : !fir.ref -// CHECK-NEXT: %[[V27:.*]] = hlfir.designate %[[V1]] (%c1) : (!fir.ref>, index) -> !fir.ref -// CHECK-NEXT: %[[V28:.*]] = fir.convert %[[V15]] : (index) -> i16 -// CHECK-NEXT: fir.store %[[V28]] to %[[V27]] : !fir.ref -// CHECK-NEXT: fir.result %[[V24]] : i32 -// CHECK-NEXT: } else { -// CHECK-NEXT: fir.result %arg4 : i32 -// CHECK-NEXT: } -// CHECK-NEXT: fir.result %[[V26]] : i32 -// CHECK-NEXT: } else { -// CHECK-NEXT: fir.result %arg4 : i32 -// CHECK-NEXT: } -// CHECK-NEXT: fir.result %[[V19]] : i32 -// CHECK-NEXT: } -// CHECK-NEXT: %[[V12:.*]] = hlfir.as_expr %[[V1]] move %false : (!fir.ref>, i1) -> !hlfir.expr<1xi16> -// CHECK-NEXT: %[[V13:.*]] = fir.shape %c1 : (index) -> !fir.shape<1> -// CHECK-NEXT: %[[V14:.*]] = hlfir.elemental %[[V13]] unordered : (!fir.shape<1>) -> !hlfir.expr { -// CHECK-NEXT: ^bb0(%arg3: index): -// CHECK-NEXT: %[[V15:.*]] = hlfir.apply %[[V12]], %arg3 : (!hlfir.expr<1xi16>, index) -> i16 -// CHECK-NEXT: %[[V16:.*]] = fir.convert %[[V15]] : (i16) -> i32 -// CHECK-NEXT: hlfir.yield_element %[[V16]] : i32 -// CHECK-NEXT: } -// CHECK-NEXT: hlfir.assign %[[V14]] to %[[V3]]#0 : !hlfir.expr, !fir.box> -// CHECK-NEXT: hlfir.destroy %[[V14]] : !hlfir.expr -// CHECK-NEXT: return - +// CHECK-LABEL: func.func @_QPtest_kind2_convert( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "array"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "val"}, +// CHECK-SAME: %[[VAL_2:.*]]: !fir.box> {fir.bindc_name = "m"}) { +// CHECK: %[[VAL_3:.*]] = arith.constant false +// CHECK: %[[VAL_4:.*]] = arith.constant true +// CHECK: %[[VAL_5:.*]] = arith.constant 2147483647 : i32 +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : i16 +// CHECK: %[[VAL_7:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_8:.*]] = arith.constant 0 : i16 +// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_10:.*]] = fir.alloca i16 +// CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<1xi16> +// CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtestEarray"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtestEm"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtestEval"} : (!fir.ref) -> (!fir.ref, !fir.ref) +// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref +// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: fir.store %[[VAL_8]] to %[[VAL_16]] : !fir.ref +// CHECK: fir.store %[[VAL_8]] to %[[VAL_10]] : !fir.ref +// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_12]]#0, %[[VAL_7]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#1, %[[VAL_9]] : index +// CHECK: %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_7]] to %[[VAL_18]] step %[[VAL_9]] iter_args(%[[VAL_21:.*]] = %[[VAL_5]]) -> (i32) { +// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_9]] : index +// CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref +// CHECK: %[[VAL_25:.*]] = arith.cmpi sge, %[[VAL_24]], %[[VAL_15]] : i32 +// CHECK: %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (i32) { +// CHECK: %[[VAL_27:.*]] = fir.load %[[VAL_10]] : !fir.ref +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref +// CHECK: %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_29]], %[[VAL_21]] : i32 +// CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (i16) -> i1 +// CHECK: %[[VAL_32:.*]] = arith.xori %[[VAL_31]], %[[VAL_4]] : i1 +// CHECK: %[[VAL_33:.*]] = arith.ori %[[VAL_30]], %[[VAL_32]] : i1 +// CHECK: %[[VAL_34:.*]] = fir.if %[[VAL_33]] -> (i32) { +// CHECK: fir.store %[[VAL_6]] to %[[VAL_10]] : !fir.ref +// CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_22]] : (index) -> i16 +// CHECK: fir.store %[[VAL_36]] to %[[VAL_35]] : !fir.ref +// CHECK: fir.result %[[VAL_29]] : i32 +// CHECK: } else { +// CHECK: fir.result %[[VAL_21]] : i32 +// CHECK: } +// CHECK: fir.result %[[VAL_34]] : i32 +// CHECK: } else { +// CHECK: fir.result %[[VAL_21]] : i32 +// CHECK: } +// CHECK: fir.result %[[VAL_26]] : i32 +// CHECK: } +// CHECK: %[[VAL_37:.*]] = hlfir.as_expr %[[VAL_11]] move %[[VAL_3]] : (!fir.ref>, i1) -> !hlfir.expr<1xi16> +// CHECK: fir.do_loop %[[VAL_38:.*]] = %[[VAL_9]] to %[[VAL_9]] step %[[VAL_9]] unordered { +// CHECK: %[[VAL_39:.*]] = hlfir.apply %[[VAL_37]], %[[VAL_38]] : (!hlfir.expr<1xi16>, index) -> i16 +// CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i16) -> i32 +// CHECK: %[[VAL_41:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_38]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_40]] to %[[VAL_41]] : i32, !fir.ref +// CHECK: } +// CHECK: return +// CHECK: } func.func @_QPtest_float(%arg0: !fir.box> {fir.bindc_name = "array"}, %arg1: !fir.ref {fir.bindc_name = "val"}, %arg2: !fir.box> {fir.bindc_name = "m"}) { diff --git a/flang/test/HLFIR/opt-bufferization-non-realloc-assignment.fir b/flang/test/HLFIR/opt-bufferization-non-realloc-assignment.fir new file mode 100644 index 0000000000000..cc65dec01cc3c --- /dev/null +++ b/flang/test/HLFIR/opt-bufferization-non-realloc-assignment.fir @@ -0,0 +1,50 @@ +// RUN: fir-opt --opt-bufferization %s | FileCheck %s + +// Verify that the shape match is not required for optimizing +// elemental assignment, when lhs not an allocatable. +// The shapes of lhs and rhs must conform in a legal program. +// +// Example: +// subroutine test(a,b) +// integer :: a(:), b(:) +// a = b + 1 +// end subroutine test + +func.func @_QPtest(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}) { + %c0 = arith.constant 0 : index + %c1_i32 = arith.constant 1 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEa"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtestEb"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) + %3:3 = fir.box_dims %2#0, %c0 : (!fir.box>, index) -> (index, index, index) + %4 = fir.shape %3#1 : (index) -> !fir.shape<1> + %5 = hlfir.elemental %4 unordered : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg2: index): + %6 = hlfir.designate %2#0 (%arg2) : (!fir.box>, index) -> !fir.ref + %7 = fir.load %6 : !fir.ref + %8 = arith.addi %7, %c1_i32 : i32 + hlfir.yield_element %8 : i32 + } + hlfir.assign %5 to %1#0 : !hlfir.expr, !fir.box> + hlfir.destroy %5 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPtest( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.box> {fir.bindc_name = "b"}) { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_5]] {uniq_name = "_QFtestEa"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_5]] {uniq_name = "_QFtestEb"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_7]]#0, %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +// CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_2]] to %[[VAL_8]]#1 step %[[VAL_2]] unordered { +// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_9]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_4]] : i32 +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] : i32, !fir.ref +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/test/Integration/OpenMP/workshare-axpy.f90 b/flang/test/Integration/OpenMP/workshare-axpy.f90 index 0c4524f855290..12246e54d3432 100644 --- a/flang/test/Integration/OpenMP/workshare-axpy.f90 +++ b/flang/test/Integration/OpenMP/workshare-axpy.f90 @@ -13,7 +13,7 @@ subroutine sb1(a, x, y, z) integer :: a integer :: x(:) integer :: y(:) - integer :: z(:) + integer, allocatable :: z(:) !$omp parallel workshare z = a * x + y !$omp end parallel workshare @@ -43,7 +43,7 @@ subroutine sb1(a, x, y, z) ! FIR: func.func @_QPsb1 ! FIR: omp.parallel { -! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref>>) { +! FIR: omp.single copyprivate(%{{[a-z0-9]+}} -> @_workshare_copy_i32 : !fir.ref, %{{[a-z0-9]+}} -> @_workshare_copy_heap_Uxi32 : !fir.ref>>) { ! FIR: fir.allocmem ! FIR: omp.wsloop { ! FIR: omp.loop_nest