[mlir][bufferization]-Try to move the needed values for subsetExtract in EmptyTensorElimination

amirBish · amirBish · commit 3054b21e256f · 2024-12-06T14:07:20.000+02:00
In this MR, we will handle the case were we may fail finding
a legal/suitable insertion point for the subsetExtract which
is  about to replace the empty tensor.

For this reason, now we try also to move the needed values
which are responsible to create the `subsetExtract` before
the candidate insertion point (tensor.empty about to be
eliminated).
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
@@ -28,23 +28,32 @@ namespace bufferization {
 using namespace mlir;
 using namespace mlir::bufferization;
 
+/// Return true if `val` is in scope at the given
+/// `insertionPoint`.
+static bool valueDominateInsertionPoint(const DominanceInfo &domInfo,
+                                        Operation *insertionPoint, Value val) {
+  if (auto bbArg = dyn_cast<BlockArgument>(val)) {
+    Block *owner = bbArg.getOwner();
+    if (!owner->findAncestorOpInBlock(*insertionPoint))
+      return false;
+  } else {
+    auto opResult = cast<OpResult>(val);
+    if (!domInfo.properlyDominates(opResult.getOwner(), insertionPoint))
+      return false;
+  }
+  return true;
+}
+
 /// Return true if all `neededValues` are in scope at the given
 /// `insertionPoint`.
 static bool
 neededValuesDominateInsertionPoint(const DominanceInfo &domInfo,
                                    Operation *insertionPoint,
                                    const SmallVector<Value> &neededValues) {
-  for (Value val : neededValues) {
-    if (auto bbArg = dyn_cast<BlockArgument>(val)) {
-      Block *owner = bbArg.getOwner();
-      if (!owner->findAncestorOpInBlock(*insertionPoint))
-        return false;
-    } else {
-      auto opResult = cast<OpResult>(val);
-      if (!domInfo.properlyDominates(opResult.getOwner(), insertionPoint))
-        return false;
-    }
-  }
+  for (Value val : neededValues)
+    if (!valueDominateInsertionPoint(domInfo, insertionPoint, val))
+      return false;
+
   return true;
 }
 
@@ -65,6 +74,23 @@ findValidInsertionPoint(Operation *emptyTensorOp,
                         const SmallVector<Value> &neededValues) {
   DominanceInfo domInfo;
 
+  // Trying to move the needed values before the `emptyTensorOp`.
+  for (Value val : neededValues) {
+    if (valueDominateInsertionPoint(domInfo, emptyTensorOp, val))
+      continue;
+    Operation *definingOp = val.getDefiningOp();
+    if (!definingOp)
+      continue;
+
+    bool isItSafeToMoveOp =
+        llvm::all_of(definingOp->getOperands(), [&](Value operand) {
+          return valueDominateInsertionPoint(domInfo, emptyTensorOp, operand);
+        });
+
+    if (isItSafeToMoveOp)
+      definingOp->moveBefore(emptyTensorOp);
+  }
+
   // Gather all possible insertion points: the location of `emptyTensorOp` and
   // right after the definition of each value in `neededValues`.
   SmallVector<Operation *> insertionPointCandidates;
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
@@ -368,21 +368,21 @@ func.func @multiple_materialize_in_destination_buffer(%m: memref<5xf32>, %f: f32
 
 // -----
 
-// `EmptyTensorElimination` fails to find a valid insertion
-// point for the new injected `SubsetExtraction`.
+// `EmptyTensorElimination` finds a valid insertion
+// point for the new injected `SubsetExtraction` by
+// trying to move the needed value for the extraction.
 // CHECK-LABEL:   func.func @fail_to_eliminate_any_empty_tensors
 func.func @fail_to_eliminate_any_empty_tensors() -> tensor<5x6x128xf32> {
   %cst_1 = arith.constant 1.0 : f32
   %cst_2 = arith.constant 2.0 : f32
   // CHECK: memref.alloc
-  // CHECK: memref.alloc
-  // CHECK: memref.alloc
+  // CHECK-NOT: memref.alloc
   %empty_1 = tensor.empty() : tensor<5x6x64xf32>
   %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
   %empty_2 = tensor.empty() : tensor<5x6x64xf32>
   %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_2 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
   %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32>
-  // CHECK: memref.copy
+  // CHECK-NOT: memref.copy
   %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1]
       : tensor<5x6x64xf32> into tensor<5x6x128xf32>
   %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1]
@@ -397,13 +397,13 @@ func.func @succeed_to_eliminate_one_empty_tensor() -> tensor<5x6x128xf32> {
   %cst_1 = arith.constant 1.0 : f32
   %cst_2 = arith.constant 2.0 : f32
   // CHECK: memref.alloc
-  // CHECK: memref.alloc
+  // CHECK-NOT: memref.alloc
   %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32>
   %empty_1 = tensor.empty() : tensor<5x6x64xf32>
   %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
   %empty_2 = tensor.empty() : tensor<5x6x64xf32>
   %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_2 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
-  // CHECK: memref.copy
+  // CHECK-NOT: memref.copy
   %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1]
       : tensor<5x6x64xf32> into tensor<5x6x128xf32>
   %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1]