-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[flang] Relax conflicts detection in ElementalAssignBufferization. #143045
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
If there is a read-effect operation inside `hlfir.elemental`, there is no reason to block moving it to the assignment point unless there are write-effect operations between the elemental and the assignment. The previous code was disallowing the optimization even if there were only read-effect operations in between. This case is from 465.tonto, though this change does not improve performance at all.
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Slava Zakharin (vzakhari) ChangesIf there is a read-effect operation inside This case is from 465.tonto, though this change does not improve Full diff: https://github.com/llvm/llvm-project/pull/143045.diff 2 Files Affected:
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index e2ca754a1817a..1af6e014a8187 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -572,13 +572,11 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
// hlfir.assign, check there are no effects which make this unsafe
// keep track of any values written to in the elemental, as these can't be
- // read from between the elemental and the assignment
+ // read from or written to between the elemental and the assignment
+ mlir::SmallVector<mlir::Value, 1> notToBeAccessedBeforeAssign;
// likewise, values read in the elemental cannot be written to between the
// elemental and the assign
- mlir::SmallVector<mlir::Value, 1> notToBeAccessedBeforeAssign;
- // any accesses to the array between the array and the assignment means it
- // would be unsafe to move the elemental to the assignment
- notToBeAccessedBeforeAssign.push_back(match.array);
+ mlir::SmallVector<mlir::Value, 1> notToBeWrittenBeforeAssign;
// 1) side effects in the elemental body - it isn't sufficient to just look
// for ordered elementals because we also cannot support out of order reads
@@ -593,10 +591,12 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
mlir::AliasResult res = containsReadOrWriteEffectOn(effect, match.array);
if (res.isNo()) {
- if (mlir::isa<mlir::MemoryEffects::Write, mlir::MemoryEffects::Read>(
- effect.getEffect()))
- if (effect.getValue())
+ if (effect.getValue()) {
+ if (mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect()))
notToBeAccessedBeforeAssign.push_back(effect.getValue());
+ else if (mlir::isa<mlir::MemoryEffects::Read>(effect.getEffect()))
+ notToBeWrittenBeforeAssign.push_back(effect.getValue());
+ }
// this is safe in the elemental
continue;
@@ -669,11 +669,23 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
mlir::AliasResult res = containsReadOrWriteEffectOn(effect, val);
if (!res.isNo()) {
LLVM_DEBUG(llvm::dbgs()
- << "diasllowed side-effect: " << effect.getValue() << " for "
+ << "disallowed side-effect: " << effect.getValue() << " for "
<< elemental.getLoc() << "\n");
return std::nullopt;
}
}
+ // Anything that is read inside the elemental can only be safely read
+ // between the elemental and the assignment.
+ for (mlir::Value val : notToBeWrittenBeforeAssign) {
+ mlir::AliasResult res = containsReadOrWriteEffectOn(effect, val);
+ if (!res.isNo() &&
+ !mlir::isa<mlir::MemoryEffects::Read>(effect.getEffect())) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "disallowed non-read side-effect: " << effect.getValue()
+ << " for " << elemental.getLoc() << "\n");
+ return std::nullopt;
+ }
+ }
}
return match;
diff --git a/flang/test/HLFIR/opt-bufferization-tonto.fir b/flang/test/HLFIR/opt-bufferization-tonto.fir
new file mode 100644
index 0000000000000..df30a90bc6ecf
--- /dev/null
+++ b/flang/test/HLFIR/opt-bufferization-tonto.fir
@@ -0,0 +1,59 @@
+// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+
+// tonto case where optimized bufferization conservatively
+// did not pull elemental into the assignment due to
+// the designate op in between:
+// module test
+// type my_type
+// real, dimension(:,:), pointer :: p => null()
+// end type my_type
+// contains
+// subroutine assign1(self, i)
+// type(my_type) :: self
+// real, dimension(3) :: ct, q
+// integer :: i
+// self%p(:,i) = q - ct
+// end subroutine assign1
+// end module test
+
+// CHECK-LABEL: func.func @_QMtestPassign1(
+// CHECK-NOT: hlfir.elemental
+// CHECK-NOT: hlfir.assign{{.*}}array
+func.func @_QMtestPassign1(%arg0: !fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>> {fir.bindc_name = "self"}, %arg1: !fir.ref<i32> {fir.bindc_name = "i"}) {
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %c3 = arith.constant 3 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca !fir.array<3xf32> {bindc_name = "ct", uniq_name = "_QMtestFassign1Ect"}
+ %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+ %3:2 = hlfir.declare %1(%2) {uniq_name = "_QMtestFassign1Ect"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
+ %4:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QMtestFassign1Ei"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5 = fir.alloca !fir.array<3xf32> {bindc_name = "q", uniq_name = "_QMtestFassign1Eq"}
+ %6:2 = hlfir.declare %5(%2) {uniq_name = "_QMtestFassign1Eq"} : (!fir.ref<!fir.array<3xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xf32>>, !fir.ref<!fir.array<3xf32>>)
+ %7:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QMtestFassign1Eself"} : (!fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>, !fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>)
+ %8 = hlfir.elemental %2 unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
+ ^bb0(%arg2: index):
+ %22 = hlfir.designate %6#0 (%arg2) : (!fir.ref<!fir.array<3xf32>>, index) -> !fir.ref<f32>
+ %23 = hlfir.designate %3#0 (%arg2) : (!fir.ref<!fir.array<3xf32>>, index) -> !fir.ref<f32>
+ %24 = fir.load %22 : !fir.ref<f32>
+ %25 = fir.load %23 : !fir.ref<f32>
+ %26 = arith.subf %24, %25 fastmath<contract> : f32
+ hlfir.yield_element %26 : f32
+ }
+ %9 = hlfir.designate %7#0{"p"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMtestTmy_type{p:!fir.box<!fir.ptr<!fir.array<?x?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+ %10 = fir.load %9 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+ %11:3 = fir.box_dims %10, %c0 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index) -> (index, index, index)
+ %12 = arith.addi %11#0, %11#1 : index
+ %13 = arith.subi %12, %c1 : index
+ %14 = arith.subi %13, %11#0 : index
+ %15 = arith.addi %14, %c1 : index
+ %16 = arith.cmpi sgt, %15, %c0 : index
+ %17 = arith.select %16, %15, %c0 : index
+ %18 = fir.load %4#0 : !fir.ref<i32>
+ %19 = fir.convert %18 : (i32) -> i64
+ %20 = fir.shape %17 : (index) -> !fir.shape<1>
+ %21 = hlfir.designate %10 (%11#0:%13:%c1, %19) shape %20 : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index, index, index, i64, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ hlfir.assign %8 to %21 : !hlfir.expr<3xf32>, !fir.box<!fir.array<?xf32>>
+ hlfir.destroy %8 : !hlfir.expr<3xf32>
+ return
+}
|
jeanPerier
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense to me, thanks Slava!
tblah
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks great
…lvm#143045) If there is a read-effect operation inside `hlfir.elemental`, there is no reason to block moving it to the assignment point unless there are write-effect operations between the elemental and the assignment. The previous code was disallowing the optimization even if there were only read-effect operations in between. This case is from 465.tonto, though this change does not improve performance at all.
If there is a read-effect operation inside
hlfir.elemental,there is no reason to block moving it to the assignment point
unless there are write-effect operations between the elemental
and the assignment. The previous code was disallowing the optimization
even if there were only read-effect operations in between.
This case is from 465.tonto, though this change does not improve
performance at all.