From 8b74b2bf1459477bb00fd7a82bbce4c89dd21090 Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh@gmail.com>
Date: Mon, 31 Mar 2025 16:50:19 -0500
Subject: [PATCH 1/4] [mlir] Vectorize tenosr.pad with low padding for unit
 dims

Signed-off-by: Nirvedh <nirvedh@gmail.com>
---
 .../Linalg/Transforms/Vectorization.cpp       | 11 +++--
 .../Linalg/vectorization-unsupported.mlir     | 24 +++++++++++
 mlir/test/Dialect/Linalg/vectorization.mlir   | 40 +++++++++++++++++++
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 2dcd897330d1e..4adacbe7d45ca 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2178,11 +2178,14 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
                                               inputVectorSizes)))
     return failure();
 
-  if (llvm::any_of(padOp.getLow(), [](Value v) {
-        std::optional<int64_t> res = getConstantIntValue(v);
-        return !res.has_value() || res.value() != 0;
+  if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) {
+        Value padValue = en.value();
+        unsigned pos = en.index();
+        std::optional<int64_t> res = getConstantIntValue(padValue);
+        return (!res.has_value() || res.value() != 0) &&
+               resultTensorShape[pos] != 1;
       })) {
-    LDBG("low pad must all be zero: " << padOp << "\n");
+    LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n");
     return failure();
   }
 
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index 2d1f0191eb798..f419d81d8df2b 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -305,6 +305,30 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+func.func @test_masked_vectorize_lowpad(
+  %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
+    -> tensor<2x4xf32> {
+  // expected-error @+3 {{Attempted to vectorize, but failed}}
+  %cst = arith.constant 42.43 : f32
+  %c0 = arith.constant 0 : index
+  %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1]  {
+    ^bb0(%hh1: index, %hh2: index):
+      tensor.yield %cst : f32
+    } : tensor<?x?xf32> to tensor<2x4xf32>
+  return %1: tensor<2x4xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+      : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // With dynamically shaped source, the vectorizer infers the vector size for
 // xfer Ops from the destination tensor and, conservatively, assumes
 // out-of-bounds accesses. Out-of-bounds accesses require a pad value, but
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index c6d9ec6215715..efd752e70df03 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -666,6 +666,46 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+//  CHECK-LABEL: func @test_masked_vectorize_unit_lowpad
+func.func @test_masked_vectorize_unit_lowpad(
+  %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
+    -> tensor<1x4xf32>
+{
+  //  CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32
+  //  CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+  //      CHECK: %[[c0_1:.*]] = arith.constant 0 : index
+  //  CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+  //  CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+  //      CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<1x4xi1>
+  //      CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
+  // CHECK-SAME:   vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[c42]]
+  // CHECK-SAME:   {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32>
+  // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32>
+  //  CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<1x4xf32>
+  //  CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
+  //      CHECK: %[[masked_write:.*]] = vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_2]], %[[c0_2]]]
+  // CHECK-SAME:   {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
+  //      CHECK: return %[[masked_write]] : tensor<1x4xf32>
+  %cst = arith.constant 42.43 : f32
+  %c0 = arith.constant 0 : index
+  %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1]  {
+    ^bb0(%hh1: index, %hh2: index):
+      tensor.yield %cst : f32
+    } : tensor<?x?xf32> to tensor<1x4xf32>
+  return %1: tensor<1x4xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+      : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [1, 4] : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // Input identical as the test in vectorization-with-patterns.mlir. Output is
 // different - vector sizes are inferred (rather than user-specified) and hence
 // masking was used.

From 81a9fc9ab546b9495b823390a19f15a88d90e6db Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh@gmail.com>
Date: Tue, 1 Apr 2025 11:40:22 -0500
Subject: [PATCH 2/4] address reviwer comments

Signed-off-by: Nirvedh <nirvedh@gmail.com>
---
 .../Linalg/Transforms/Vectorization.cpp       | 11 ++++++++
 .../Linalg/vectorization-unsupported.mlir     |  3 ++
 mlir/test/Dialect/Linalg/vectorization.mlir   | 28 ++++++++++---------
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 4adacbe7d45ca..dcab00f5603e3 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2178,6 +2178,17 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
                                               inputVectorSizes)))
     return failure();
 
+  // Low padding is currently unsupported for the general case as this
+  // would require shifting the results to the right for the low padded dims by
+  // the required amount of low padding. However, we do support low padding if
+  // the dims being low padded have result sizes of 1. The reason is when we
+  // have a low pad on a unit result dim, the input size of that dimension will
+  // be dynamically zero (as the sum of the low pad and input dim size has to be
+  // one) and hence we will create a zero mask as the lowering logic just makes
+  // the mask one for the input dim size - which is zero here. Hence we will
+  // load the pad value which is what we want in this case. If the low pad is
+  // dynamically zero then the lowering is correct as well as no shifts are
+  // necessary.
   if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) {
         Value padValue = en.value();
         unsigned pos = en.index();
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index f419d81d8df2b..0b9a867436856 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -305,6 +305,9 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// Low padding is unsupported with the exception of the special case if the
+// result size of the padded dimension is unit. Here `%l0` being a non-zero
+// padding applied to a non-unit dimenension makes this case unsupported.
 func.func @test_masked_vectorize_lowpad(
   %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
     -> tensor<2x4xf32> {
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index efd752e70df03..fdcbb01801a64 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -665,27 +665,29 @@ module attributes {transform.with_named_sequence} {
 }
 
 // -----
-
+// This case is supported because low padding `%l0` is applied on
+// a unit dimentsion which is supported, non unit result dimension low
+// padding is currently unsupported.
 //  CHECK-LABEL: func @test_masked_vectorize_unit_lowpad
 func.func @test_masked_vectorize_unit_lowpad(
   %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
     -> tensor<1x4xf32>
 {
-  //  CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32
-  //  CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
-  //      CHECK: %[[c0_1:.*]] = arith.constant 0 : index
-  //  CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
-  //  CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
-  //      CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<1x4xi1>
-  //      CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
-  // CHECK-SAME:   vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[c42]]
+  //  CHECK-DAG: %[[C42:.*]] = arith.constant 4.243000e+01 : f32
+  //  CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+  //      CHECK: %[[C0_1:.*]] = arith.constant 0 : index
+  //  CHECK-DAG: %[[D0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+  //  CHECK-DAG: %[[D1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+  //      CHECK: %[[MASK:.*]] = vector.create_mask %[[D0]], %[[D1]] : vector<1x4xi1>
+  //      CHECK: %[[MASKED_READ:.*]] = vector.mask %[[MASK]] {
+  // CHECK-SAME:   vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[C42]]
   // CHECK-SAME:   {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32>
   // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32>
-  //  CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<1x4xf32>
-  //  CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
-  //      CHECK: %[[masked_write:.*]] = vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_2]], %[[c0_2]]]
+  //  CHECK-DAG: %[[EMPTY:.*]] = tensor.empty() : tensor<1x4xf32>
+  //  CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index
+  //      CHECK: %[[MASKED_WRITE:.*]] = vector.transfer_write %[[MASKED_READ]], %[[EMPTY]][%[[C0_2]], %[[C0_2]]]
   // CHECK-SAME:   {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32>
-  //      CHECK: return %[[masked_write]] : tensor<1x4xf32>
+  //      CHECK: return %[[MASKED_WRITE]] : tensor<1x4xf32>
   %cst = arith.constant 42.43 : f32
   %c0 = arith.constant 0 : index
   %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1]  {

From 1465643dfc2c8f66a9845e76ad06828ff03e7209 Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh@gmail.com>
Date: Tue, 1 Apr 2025 11:48:23 -0500
Subject: [PATCH 3/4] typos

---
 mlir/test/Dialect/Linalg/vectorization-unsupported.mlir | 2 +-
 mlir/test/Dialect/Linalg/vectorization.mlir             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index 0b9a867436856..9410d8b556ca5 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -307,7 +307,7 @@ module attributes {transform.with_named_sequence} {
 
 // Low padding is unsupported with the exception of the special case if the
 // result size of the padded dimension is unit. Here `%l0` being a non-zero
-// padding applied to a non-unit dimenension makes this case unsupported.
+// padding applied to a non-unit result dimension makes this case unsupported.
 func.func @test_masked_vectorize_lowpad(
   %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
     -> tensor<2x4xf32> {
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index fdcbb01801a64..79b4d13ef16a8 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -666,7 +666,7 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 // This case is supported because low padding `%l0` is applied on
-// a unit dimentsion which is supported, non unit result dimension low
+// a unit dimension which is supported, non unit result dimension low
 // padding is currently unsupported.
 //  CHECK-LABEL: func @test_masked_vectorize_unit_lowpad
 func.func @test_masked_vectorize_unit_lowpad(

From 20551ddb48928ee927c623985c62d53f4f12d4e8 Mon Sep 17 00:00:00 2001
From: Nirvedh <nirvedh@gmail.com>
Date: Wed, 2 Apr 2025 11:32:36 -0500
Subject: [PATCH 4/4] reviwer comments 2

Signed-off-by: Nirvedh <nirvedh@gmail.com>
---
 .../Linalg/Transforms/Vectorization.cpp       | 26 +++++++++----------
 .../Linalg/vectorization-unsupported.mlir     |  8 +++---
 mlir/test/Dialect/Linalg/vectorization.mlir   |  4 +--
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index dcab00f5603e3..8c8b1b85ef5a3 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2178,22 +2178,22 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
                                               inputVectorSizes)))
     return failure();
 
-  // Low padding is currently unsupported for the general case as this
-  // would require shifting the results to the right for the low padded dims by
-  // the required amount of low padding. However, we do support low padding if
-  // the dims being low padded have result sizes of 1. The reason is when we
-  // have a low pad on a unit result dim, the input size of that dimension will
-  // be dynamically zero (as the sum of the low pad and input dim size has to be
-  // one) and hence we will create a zero mask as the lowering logic just makes
-  // the mask one for the input dim size - which is zero here. Hence we will
-  // load the pad value which is what we want in this case. If the low pad is
-  // dynamically zero then the lowering is correct as well as no shifts are
-  // necessary.
+  // Padding with non-zero low pad values is not supported, unless the
+  // corresponding result dim is 1 as this would require shifting the results to
+  // the right for the low padded dims by the required amount of low padding.
+  // However, we do support low padding if the dims being low padded have result
+  // sizes of 1. The reason is when we have a low pad on a unit result dim, the
+  // input size of that dimension will be dynamically zero (as the sum of the
+  // low pad and input dim size has to be one) and hence we will create a zero
+  // mask as the lowering logic just makes the mask one for the input dim size -
+  // which is zero here. Hence we will load the pad value which is what we want
+  // in this case. If the low pad is dynamically zero then the lowering is
+  // correct as well as no shifts are necessary.
   if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) {
         Value padValue = en.value();
         unsigned pos = en.index();
-        std::optional<int64_t> res = getConstantIntValue(padValue);
-        return (!res.has_value() || res.value() != 0) &&
+        std::optional<int64_t> pad = getConstantIntValue(padValue);
+        return (!pad.has_value() || pad.value() != 0) &&
                resultTensorShape[pos] != 1;
       })) {
     LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n");
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index 9410d8b556ca5..f653a4852b074 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -305,10 +305,10 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
-// Low padding is unsupported with the exception of the special case if the
-// result size of the padded dimension is unit. Here `%l0` being a non-zero
-// padding applied to a non-unit result dimension makes this case unsupported.
-func.func @test_masked_vectorize_lowpad(
+// Padding with non-zero low pad values is not supported, unless the corresponding
+// result dim is 1. Here `%l0` being a non-zero low pad applied to a
+// non-unit result dimension makes this case unsupported.
+func.func @tensor_pad_non_zero_low_pad(
   %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
     -> tensor<2x4xf32> {
   // expected-error @+3 {{Attempted to vectorize, but failed}}
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index 79b4d13ef16a8..299be1296aa66 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -668,8 +668,8 @@ module attributes {transform.with_named_sequence} {
 // This case is supported because low padding `%l0` is applied on
 // a unit dimension which is supported, non unit result dimension low
 // padding is currently unsupported.
-//  CHECK-LABEL: func @test_masked_vectorize_unit_lowpad
-func.func @test_masked_vectorize_unit_lowpad(
+//  CHECK-LABEL: func @test_masked_vectorize_non_zero_low_pad_unit_res_dim
+func.func @test_masked_vectorize_non_zero_low_pad_unit_res_dim(
   %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index)
     -> tensor<1x4xf32>
 {