From cca08523984f96b05ba6f2317d8157f0cb82b339 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Thu, 5 Jun 2025 15:33:59 -0700 Subject: [PATCH] [flang] Relax conflicts detection in ElementalAssignBufferization. If there is a read-effect operation inside `hlfir.elemental`, there is no reason to block moving it to the assignment point unless there are write-effect operations between the elemental and the assignment. The previous code was disallowing the optimization even if there were only read-effect operations in between. This case is from 465.tonto, though this change does not improve performance at all. --- .../Transforms/OptimizedBufferization.cpp | 30 +++++++--- flang/test/HLFIR/opt-bufferization-tonto.fir | 59 +++++++++++++++++++ 2 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 flang/test/HLFIR/opt-bufferization-tonto.fir diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index e2ca754a1817a..1af6e014a8187 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -572,13 +572,11 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { // hlfir.assign, check there are no effects which make this unsafe // keep track of any values written to in the elemental, as these can't be - // read from between the elemental and the assignment + // read from or written to between the elemental and the assignment + mlir::SmallVector notToBeAccessedBeforeAssign; // likewise, values read in the elemental cannot be written to between the // elemental and the assign - mlir::SmallVector notToBeAccessedBeforeAssign; - // any accesses to the array between the array and the assignment means it - // would be unsafe to move the elemental to the assignment - notToBeAccessedBeforeAssign.push_back(match.array); + mlir::SmallVector notToBeWrittenBeforeAssign; // 1) side effects in the elemental body - it isn't sufficient to just look // for ordered elementals because we also cannot support out of order reads @@ -593,10 +591,12 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { for (const mlir::MemoryEffects::EffectInstance &effect : *effects) { mlir::AliasResult res = containsReadOrWriteEffectOn(effect, match.array); if (res.isNo()) { - if (mlir::isa( - effect.getEffect())) - if (effect.getValue()) + if (effect.getValue()) { + if (mlir::isa(effect.getEffect())) notToBeAccessedBeforeAssign.push_back(effect.getValue()); + else if (mlir::isa(effect.getEffect())) + notToBeWrittenBeforeAssign.push_back(effect.getValue()); + } // this is safe in the elemental continue; @@ -669,11 +669,23 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) { mlir::AliasResult res = containsReadOrWriteEffectOn(effect, val); if (!res.isNo()) { LLVM_DEBUG(llvm::dbgs() - << "diasllowed side-effect: " << effect.getValue() << " for " + << "disallowed side-effect: " << effect.getValue() << " for " << elemental.getLoc() << "\n"); return std::nullopt; } } + // Anything that is read inside the elemental can only be safely read + // between the elemental and the assignment. + for (mlir::Value val : notToBeWrittenBeforeAssign) { + mlir::AliasResult res = containsReadOrWriteEffectOn(effect, val); + if (!res.isNo() && + !mlir::isa(effect.getEffect())) { + LLVM_DEBUG(llvm::dbgs() + << "disallowed non-read side-effect: " << effect.getValue() + << " for " << elemental.getLoc() << "\n"); + return std::nullopt; + } + } } return match; diff --git a/flang/test/HLFIR/opt-bufferization-tonto.fir b/flang/test/HLFIR/opt-bufferization-tonto.fir new file mode 100644 index 0000000000000..df30a90bc6ecf --- /dev/null +++ b/flang/test/HLFIR/opt-bufferization-tonto.fir @@ -0,0 +1,59 @@ +// RUN: fir-opt --opt-bufferization %s | FileCheck %s + +// tonto case where optimized bufferization conservatively +// did not pull elemental into the assignment due to +// the designate op in between: +// module test +// type my_type +// real, dimension(:,:), pointer :: p => null() +// end type my_type +// contains +// subroutine assign1(self, i) +// type(my_type) :: self +// real, dimension(3) :: ct, q +// integer :: i +// self%p(:,i) = q - ct +// end subroutine assign1 +// end module test + +// CHECK-LABEL: func.func @_QMtestPassign1( +// CHECK-NOT: hlfir.elemental +// CHECK-NOT: hlfir.assign{{.*}}array +func.func @_QMtestPassign1(%arg0: !fir.ref>>}>> {fir.bindc_name = "self"}, %arg1: !fir.ref {fir.bindc_name = "i"}) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c3 = arith.constant 3 : index + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca !fir.array<3xf32> {bindc_name = "ct", uniq_name = "_QMtestFassign1Ect"} + %2 = fir.shape %c3 : (index) -> !fir.shape<1> + %3:2 = hlfir.declare %1(%2) {uniq_name = "_QMtestFassign1Ect"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %4:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QMtestFassign1Ei"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %5 = fir.alloca !fir.array<3xf32> {bindc_name = "q", uniq_name = "_QMtestFassign1Eq"} + %6:2 = hlfir.declare %5(%2) {uniq_name = "_QMtestFassign1Eq"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %7:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QMtestFassign1Eself"} : (!fir.ref>>}>>, !fir.dscope) -> (!fir.ref>>}>>, !fir.ref>>}>>) + %8 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> { + ^bb0(%arg2: index): + %22 = hlfir.designate %6#0 (%arg2) : (!fir.ref>, index) -> !fir.ref + %23 = hlfir.designate %3#0 (%arg2) : (!fir.ref>, index) -> !fir.ref + %24 = fir.load %22 : !fir.ref + %25 = fir.load %23 : !fir.ref + %26 = arith.subf %24, %25 fastmath : f32 + hlfir.yield_element %26 : f32 + } + %9 = hlfir.designate %7#0{"p"} {fortran_attrs = #fir.var_attrs} : (!fir.ref>>}>>) -> !fir.ref>>> + %10 = fir.load %9 : !fir.ref>>> + %11:3 = fir.box_dims %10, %c0 : (!fir.box>>, index) -> (index, index, index) + %12 = arith.addi %11#0, %11#1 : index + %13 = arith.subi %12, %c1 : index + %14 = arith.subi %13, %11#0 : index + %15 = arith.addi %14, %c1 : index + %16 = arith.cmpi sgt, %15, %c0 : index + %17 = arith.select %16, %15, %c0 : index + %18 = fir.load %4#0 : !fir.ref + %19 = fir.convert %18 : (i32) -> i64 + %20 = fir.shape %17 : (index) -> !fir.shape<1> + %21 = hlfir.designate %10 (%11#0:%13:%c1, %19) shape %20 : (!fir.box>>, index, index, index, i64, !fir.shape<1>) -> !fir.box> + hlfir.assign %8 to %21 : !hlfir.expr<3xf32>, !fir.box> + hlfir.destroy %8 : !hlfir.expr<3xf32> + return +}