Automerge: [MLIR] [Vector] Fix canonicalization for vector.scatter with tensor output (#168824)

sakupan102 · github-actions[bot] · commit fa399f705f14 · 2025-12-12T12:56:46.000Z
Commit llvm/llvm-project@7e7ea9c added tensor support for scatter, but running the existing canonicalization on tensors causes bugs, so we fix the canonicalization with tensor output. Closes llvm/llvm-project#168695 --------- Signed-off-by: Ryutaro Okada <1015ryu88@gmail.com>
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -6100,11 +6100,22 @@ class ScatterFolder final : public OpRewritePattern<ScatterOp> {
   using Base::Base;
   LogicalResult matchAndRewrite(ScatterOp scatter,
                                 PatternRewriter &rewriter) const override {
+    ShapedType baseType = scatter.getBaseType();
+    bool isMemRef = isa<MemRefType>(baseType);
+    if (!isMemRef && !isa<RankedTensorType>(baseType))
+      return failure();
+
+    // Memrefs have no result, so an all-false mask can simply erase the op.
+    // Tensors carry the updated value, so we must replace uses with the
+    // original base tensor instead of erasing.
     switch (getMaskFormat(scatter.getMask())) {
     case MaskFormat::AllTrue:
       return failure(); // no unmasked equivalent
     case MaskFormat::AllFalse:
-      rewriter.eraseOp(scatter);
+      if (isMemRef)
+        rewriter.eraseOp(scatter);
+      else
+        rewriter.replaceOp(scatter, scatter.getBase());
       return success();
     case MaskFormat::Unknown:
       return failure();
@@ -6120,6 +6131,11 @@ class FoldContiguousScatter final : public OpRewritePattern<ScatterOp> {
   using Base::Base;
   LogicalResult matchAndRewrite(ScatterOp op,
                                 PatternRewriter &rewriter) const override {
+    // Fold only for memrefs: the replacement uses maskedstore, which does not
+    // support tensor bases. Tensor cases intentionally bail out.
+    if (!isa<MemRefType>(op.getBase().getType()))
+      return failure();
+
     if (failed(isZeroBasedContiguousSeq(op.getIndices())))
       return failure();
 
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -3928,6 +3928,53 @@ func.func @contiguous_scatter_step(%base: memref<?xf32>,
 
 // -----
 
+// No canoniclization should happen here as the base is a tensor.
+// CHECK-LABEL: @no_fold_contiguous_scatter_tensor
+// CHECK-NOT: vector.maskedstore
+//     CHECK:   %[[RES:.*]] = vector.scatter
+//     CHECK:   return %[[RES]]
+func.func @no_fold_contiguous_scatter_tensor(%base: tensor<16xf32>,
+                                          %mask: vector<16xi1>,
+                                          %value: vector<16xf32>) -> tensor<16xf32> {
+  %c0 = arith.constant 0 : index
+  %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
+  %0 = vector.scatter %base[%c0] [%indices], %mask, %value
+      : tensor<16xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> -> tensor<16xf32>
+  return %0 : tensor<16xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @scatter_memref_all_false
+//  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[INDEX:.*]]: vector<16xindex>, %[[VALUE:.*]]: vector<16xf32>)
+//  CHECK-NEXT:   return
+func.func @scatter_memref_all_false(%base: memref<?xf32>,
+                                    %index: vector<16xindex>,
+                                    %value: vector<16xf32>) {
+  %c0 = arith.constant 0 : index
+  %mask = arith.constant dense<false> : vector<16xi1>
+  vector.scatter %base[%c0][%index], %mask, %value
+      : memref<?xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @scatter_tensor_all_false
+//  CHECK-SAME:   (%[[BASE:.*]]: tensor<16xf32>, %[[INDEX:.*]]: vector<16xindex>, %[[VALUE:.*]]: vector<16xf32>) -> tensor<16xf32> {
+//       CHECK:   return %[[BASE]] : tensor<16xf32>
+func.func @scatter_tensor_all_false(%base: tensor<16xf32>,
+                                    %index: vector<16xindex>,
+                                    %value: vector<16xf32>) -> tensor<16xf32> {
+  %c0 = arith.constant 0 : index
+  %mask = arith.constant dense<false> : vector<16xi1>
+  %0 = vector.scatter %base[%c0][%index], %mask, %value
+      : tensor<16xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32> -> tensor<16xf32>
+  return %0 : tensor<16xf32>
+}
+
+// -----
+
 // CHECK-LABEL: @fold_extract_constant_indices
 //   CHECK-SAME:   %[[ARG:.*]]: vector<32x1xi32>) -> i32 {
 //        CHECK:   %[[RES:.*]] = vector.extract %[[ARG]][0, 0] : i32 from vector<32x1xi32>