Implement pattern as folder function, remove oneuse conditional judgement, update tests.

linuxlonelyeagle · linuxlonelyeagle · commit ddd3df95de72 · 2025-07-04T15:57:45.000Z
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -3334,29 +3334,6 @@ class InsertSplatToSplat final : public OpRewritePattern<InsertOp> {
     return success();
   }
 };
-
-/// Pattern to rewrite a InsertOp(InsertOp) to InsertOp.
-class InsertInsertToInsert final : public OpRewritePattern<InsertOp> {
-public:
-  using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(InsertOp op,
-                                PatternRewriter &rewriter) const override {
-    auto destInsert = op.getDest().getDefiningOp<InsertOp>();
-    if (!destInsert)
-      return failure();
-
-    if (!destInsert->hasOneUse())
-      return failure();
-
-    if (op.getMixedPosition() != destInsert.getMixedPosition())
-      return failure();
-
-    rewriter.replaceOpWithNewOp<InsertOp>(
-        op, op.getValueToStore(), destInsert.getDest(), op.getMixedPosition());
-    return success();
-  }
-};
-
 } // namespace
 
 static Attribute
@@ -3409,13 +3386,26 @@ foldDenseElementsAttrDestInsertOp(InsertOp insertOp, Attribute srcAttr,
   return newAttr;
 }
 
+/// Folder to replace the `dest` operand of the insert op with the root dest of
+/// the insert op use chain.
+static Value foldInsertUseChain(InsertOp insertOp) {
+  auto destInsert = insertOp.getDest().getDefiningOp<InsertOp>();
+  if (!destInsert)
+    return {};
+
+  if (insertOp.getMixedPosition() != destInsert.getMixedPosition())
+    return {};
+
+  insertOp.setOperand(1, destInsert.getDest());
+  return insertOp.getResult();
+}
+
 void InsertOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                            MLIRContext *context) {
-  results.add<InsertToBroadcast, BroadcastFolder, InsertSplatToSplat,
-              InsertInsertToInsert>(context);
+  results.add<InsertToBroadcast, BroadcastFolder, InsertSplatToSplat>(context);
 }
 
-OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) {
+OpFoldResult InsertOp::fold(FoldAdaptor adaptor) {
   // Do not create constants with more than `vectorSizeFoldThreashold` elements,
   // unless the source vector constant has a single use.
   constexpr int64_t vectorSizeFoldThreshold = 256;
@@ -3430,6 +3420,8 @@ OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) {
   SmallVector<Value> operands = {getValueToStore(), getDest()};
   auto inplaceFolded = extractInsertFoldConstantOp(*this, adaptor, operands);
 
+  if (auto res = foldInsertUseChain(*this))
+    return res;
   if (auto res = foldPoisonIndexInsertExtractOp(
           getContext(), adaptor.getStaticPosition(), kPoisonIndex))
     return res;
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -3449,14 +3449,65 @@ func.func @fold_insert_constant_indices(%arg : vector<4x1xi32>) -> vector<4x1xi3
 
 // -----
 
-// CHECK-LABEL: @insert_insert_to_insert(
+// CHECK-LABEL: @fold_insert_use_chain_static_pos(
 //  CHECK-SAME:   %[[ARG:.*]]: vector<4xf32>,
 //  CHECK-SAME:   %[[VAL:.*]]: f32) -> vector<4xf32> {
 //       CHECK:   %[[RES:.*]] = vector.insert %[[VAL]], %[[ARG]] [0] : f32 into vector<4xf32>
 //       CHECK:    return %[[RES]] : vector<4xf32>
-func.func @insert_insert_to_insert(%v : vector<4xf32>, %value : f32) -> vector<4xf32> {
+func.func @fold_insert_use_chain_static_pos(%v : vector<4xf32>, %value : f32) -> vector<4xf32> {
   %v_0 = vector.insert %value, %v[0] : f32 into vector<4xf32>
   %v_1 = vector.insert %value, %v_0[0] : f32 into vector<4xf32>
   %v_2 = vector.insert %value, %v_1[0] : f32 into vector<4xf32>
   return %v_2 : vector<4xf32>  
 }
+
+// -----
+
+// CHECK-LABEL: @fold_insert_use_chain_dynamic_pos(
+//  CHECK-SAME:   %[[ARG:.*]]: vector<4x4xf32>,
+//  CHECK-SAME:   %[[VAL:.*]]: f32,
+//  CHECK-SAME:   %[[POS:.*]]: index) -> vector<4x4xf32> {
+//       CHECK:   %[[RES:.*]] = vector.insert %[[VAL]], %[[ARG]] {{\[}}%[[POS]], 0] : f32 into vector<4x4xf32>
+//       CHECK:   return %[[RES]] : vector<4x4xf32>
+func.func @fold_insert_use_chain_dynamic_pos(%arg : vector<4x4xf32>, %value : f32, %pos: index) -> vector<4x4xf32> {
+  %v_0 = vector.insert %value, %arg[%pos, 0] : f32 into vector<4x4xf32>
+  %v_1 = vector.insert %value, %v_0[%pos, 0] : f32 into vector<4x4xf32>
+  %v_2 = vector.insert %value, %v_1[%pos, 0] : f32 into vector<4x4xf32>
+  return %v_2 : vector<4x4xf32>  
+}
+
+// -----
+
+// CHECK-LABEL: @fold_insert_use_chain_add_float(
+//  CHECK-SAME:   %[[VEC_0:.*]]: vector<4xf32>,
+//  CHECK-SAME:   %[[VAL:.*]]: f32) -> vector<4xf32> {
+//       CHECK:   %[[VEC_1:.*]] = vector.insert %[[VAL]], %[[VEC_0]] [0] : f32 into vector<4xf32>
+//       CHECK:   %[[VEC_2:.*]] = arith.addf %[[VEC_1]], %[[VEC_1]] : vector<4xf32>
+//       CHECK:   %[[VEC_3:.*]] = vector.insert %[[VAL]], %[[VEC_0]] [0] : f32 into vector<4xf32>
+//       CHECK:   %[[VEC_4:.*]] = arith.addf %[[VEC_2]], %[[VEC_3]] : vector<4xf32>
+//       CHECK:   return %[[VEC_4]] : vector<4xf32>
+func.func @fold_insert_use_chain_add_float(%v : vector<4xf32>, %value : f32) -> vector<4xf32> {
+  %v_0 = vector.insert %value, %v[0] : f32 into vector<4xf32>
+  %v_1 = arith.addf %v_0, %v_0 : vector<4xf32>
+  %v_2 = vector.insert %value, %v_0[0] : f32 into vector<4xf32>
+  %v_3 = arith.addf %v_1, %v_2 : vector<4xf32>
+  return %v_3 : vector<4xf32>  
+}
+
+// -----
+
+// CHECK-LABEL: @fold_insert_use_chain_add_float_pos_mismatch(
+//  CHECK-SAME:   %[[VEC_0:.*]]: vector<4xf32>,
+//  CHECK-SAME:   %[[VAL:.*]]: f32) -> vector<4xf32> {
+//       CHECK:   %[[VEC_1:.*]] = vector.insert %[[VAL]], %[[VEC_0]] [0] : f32 into vector<4xf32>
+//       CHECK:   %[[VEC_2:.*]] = arith.addf %[[VEC_1]], %[[VEC_1]] : vector<4xf32>
+//       CHECK:   %[[VEC_3:.*]] = vector.insert %[[VAL]], %[[VEC_1]] [1] : f32 into vector<4xf32>
+//       CHECK:   %[[VEC_4:.*]] = arith.addf %[[VEC_2]], %[[VEC_3]] : vector<4xf32>
+//       CHECK:   return %[[VEC_4]] : vector<4xf32>
+func.func @fold_insert_use_chain_add_float_pos_mismatch(%v : vector<4xf32>, %value : f32) -> vector<4xf32> {
+  %v_0 = vector.insert %value, %v[0] : f32 into vector<4xf32>
+  %v_1 = arith.addf %v_0, %v_0 : vector<4xf32>
+  %v_2 = vector.insert %value, %v_0[1] : f32 into vector<4xf32>
+  %v_3 = arith.addf %v_1, %v_2 : vector<4xf32>
+  return %v_3 : vector<4xf32>  
+}