From 8b74b2bf1459477bb00fd7a82bbce4c89dd21090 Mon Sep 17 00:00:00 2001 From: Nirvedh Date: Mon, 31 Mar 2025 16:50:19 -0500 Subject: [PATCH 1/4] [mlir] Vectorize tenosr.pad with low padding for unit dims Signed-off-by: Nirvedh --- .../Linalg/Transforms/Vectorization.cpp | 11 +++-- .../Linalg/vectorization-unsupported.mlir | 24 +++++++++++ mlir/test/Dialect/Linalg/vectorization.mlir | 40 +++++++++++++++++++ 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 2dcd897330d1e..4adacbe7d45ca 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2178,11 +2178,14 @@ vectorizePadOpPrecondition(tensor::PadOp padOp, inputVectorSizes))) return failure(); - if (llvm::any_of(padOp.getLow(), [](Value v) { - std::optional res = getConstantIntValue(v); - return !res.has_value() || res.value() != 0; + if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) { + Value padValue = en.value(); + unsigned pos = en.index(); + std::optional res = getConstantIntValue(padValue); + return (!res.has_value() || res.value() != 0) && + resultTensorShape[pos] != 1; })) { - LDBG("low pad must all be zero: " << padOp << "\n"); + LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n"); return failure(); } diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index 2d1f0191eb798..f419d81d8df2b 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -305,6 +305,30 @@ module attributes {transform.with_named_sequence} { // ----- +func.func @test_masked_vectorize_lowpad( + %0 : tensor, %h0 : index, %h1 : index, %l0 : index) + -> tensor<2x4xf32> { + // expected-error @+3 {{Attempted to vectorize, but failed}} + %cst = arith.constant 42.43 : f32 + %c0 = arith.constant 0 : index + %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] { + ^bb0(%hh1: index, %hh2: index): + tensor.yield %cst : f32 + } : tensor to tensor<2x4xf32> + return %1: tensor<2x4xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op + transform.yield + } +} + +// ----- + // With dynamically shaped source, the vectorizer infers the vector size for // xfer Ops from the destination tensor and, conservatively, assumes // out-of-bounds accesses. Out-of-bounds accesses require a pad value, but diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index c6d9ec6215715..efd752e70df03 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -666,6 +666,46 @@ module attributes {transform.with_named_sequence} { // ----- +// CHECK-LABEL: func @test_masked_vectorize_unit_lowpad +func.func @test_masked_vectorize_unit_lowpad( + %0 : tensor, %h0 : index, %h1 : index, %l0 : index) + -> tensor<1x4xf32> +{ + // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 + // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index + // CHECK: %[[c0_1:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} : tensor + // CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} : tensor + // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<1x4xi1> + // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { + // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[c42]] + // CHECK-SAME: {in_bounds = [true, true]} : tensor, vector<1x4xf32> + // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32> + // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<1x4xf32> + // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index + // CHECK: %[[masked_write:.*]] = vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_2]], %[[c0_2]]] + // CHECK-SAME: {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> + // CHECK: return %[[masked_write]] : tensor<1x4xf32> + %cst = arith.constant 42.43 : f32 + %c0 = arith.constant 0 : index + %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] { + ^bb0(%hh1: index, %hh2: index): + tensor.yield %cst : f32 + } : tensor to tensor<1x4xf32> + return %1: tensor<1x4xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [1, 4] : !transform.any_op + transform.yield + } +} + +// ----- + // Input identical as the test in vectorization-with-patterns.mlir. Output is // different - vector sizes are inferred (rather than user-specified) and hence // masking was used. From 81a9fc9ab546b9495b823390a19f15a88d90e6db Mon Sep 17 00:00:00 2001 From: Nirvedh Date: Tue, 1 Apr 2025 11:40:22 -0500 Subject: [PATCH 2/4] address reviwer comments Signed-off-by: Nirvedh --- .../Linalg/Transforms/Vectorization.cpp | 11 ++++++++ .../Linalg/vectorization-unsupported.mlir | 3 ++ mlir/test/Dialect/Linalg/vectorization.mlir | 28 ++++++++++--------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 4adacbe7d45ca..dcab00f5603e3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2178,6 +2178,17 @@ vectorizePadOpPrecondition(tensor::PadOp padOp, inputVectorSizes))) return failure(); + // Low padding is currently unsupported for the general case as this + // would require shifting the results to the right for the low padded dims by + // the required amount of low padding. However, we do support low padding if + // the dims being low padded have result sizes of 1. The reason is when we + // have a low pad on a unit result dim, the input size of that dimension will + // be dynamically zero (as the sum of the low pad and input dim size has to be + // one) and hence we will create a zero mask as the lowering logic just makes + // the mask one for the input dim size - which is zero here. Hence we will + // load the pad value which is what we want in this case. If the low pad is + // dynamically zero then the lowering is correct as well as no shifts are + // necessary. if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) { Value padValue = en.value(); unsigned pos = en.index(); diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index f419d81d8df2b..0b9a867436856 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -305,6 +305,9 @@ module attributes {transform.with_named_sequence} { // ----- +// Low padding is unsupported with the exception of the special case if the +// result size of the padded dimension is unit. Here `%l0` being a non-zero +// padding applied to a non-unit dimenension makes this case unsupported. func.func @test_masked_vectorize_lowpad( %0 : tensor, %h0 : index, %h1 : index, %l0 : index) -> tensor<2x4xf32> { diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index efd752e70df03..fdcbb01801a64 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -665,27 +665,29 @@ module attributes {transform.with_named_sequence} { } // ----- - +// This case is supported because low padding `%l0` is applied on +// a unit dimentsion which is supported, non unit result dimension low +// padding is currently unsupported. // CHECK-LABEL: func @test_masked_vectorize_unit_lowpad func.func @test_masked_vectorize_unit_lowpad( %0 : tensor, %h0 : index, %h1 : index, %l0 : index) -> tensor<1x4xf32> { - // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 - // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index - // CHECK: %[[c0_1:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} : tensor - // CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} : tensor - // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<1x4xi1> - // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { - // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[c42]] + // CHECK-DAG: %[[C42:.*]] = arith.constant 4.243000e+01 : f32 + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[C0_1:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[D0:.*]] = tensor.dim {{.*}} : tensor + // CHECK-DAG: %[[D1:.*]] = tensor.dim {{.*}} : tensor + // CHECK: %[[MASK:.*]] = vector.create_mask %[[D0]], %[[D1]] : vector<1x4xi1> + // CHECK: %[[MASKED_READ:.*]] = vector.mask %[[MASK]] { + // CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[C42]] // CHECK-SAME: {in_bounds = [true, true]} : tensor, vector<1x4xf32> // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32> - // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<1x4xf32> - // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index - // CHECK: %[[masked_write:.*]] = vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_2]], %[[c0_2]]] + // CHECK-DAG: %[[EMPTY:.*]] = tensor.empty() : tensor<1x4xf32> + // CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index + // CHECK: %[[MASKED_WRITE:.*]] = vector.transfer_write %[[MASKED_READ]], %[[EMPTY]][%[[C0_2]], %[[C0_2]]] // CHECK-SAME: {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> - // CHECK: return %[[masked_write]] : tensor<1x4xf32> + // CHECK: return %[[MASKED_WRITE]] : tensor<1x4xf32> %cst = arith.constant 42.43 : f32 %c0 = arith.constant 0 : index %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] { From 1465643dfc2c8f66a9845e76ad06828ff03e7209 Mon Sep 17 00:00:00 2001 From: Nirvedh Date: Tue, 1 Apr 2025 11:48:23 -0500 Subject: [PATCH 3/4] typos --- mlir/test/Dialect/Linalg/vectorization-unsupported.mlir | 2 +- mlir/test/Dialect/Linalg/vectorization.mlir | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index 0b9a867436856..9410d8b556ca5 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -307,7 +307,7 @@ module attributes {transform.with_named_sequence} { // Low padding is unsupported with the exception of the special case if the // result size of the padded dimension is unit. Here `%l0` being a non-zero -// padding applied to a non-unit dimenension makes this case unsupported. +// padding applied to a non-unit result dimension makes this case unsupported. func.func @test_masked_vectorize_lowpad( %0 : tensor, %h0 : index, %h1 : index, %l0 : index) -> tensor<2x4xf32> { diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index fdcbb01801a64..79b4d13ef16a8 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -666,7 +666,7 @@ module attributes {transform.with_named_sequence} { // ----- // This case is supported because low padding `%l0` is applied on -// a unit dimentsion which is supported, non unit result dimension low +// a unit dimension which is supported, non unit result dimension low // padding is currently unsupported. // CHECK-LABEL: func @test_masked_vectorize_unit_lowpad func.func @test_masked_vectorize_unit_lowpad( From 20551ddb48928ee927c623985c62d53f4f12d4e8 Mon Sep 17 00:00:00 2001 From: Nirvedh Date: Wed, 2 Apr 2025 11:32:36 -0500 Subject: [PATCH 4/4] reviwer comments 2 Signed-off-by: Nirvedh --- .../Linalg/Transforms/Vectorization.cpp | 26 +++++++++---------- .../Linalg/vectorization-unsupported.mlir | 8 +++--- mlir/test/Dialect/Linalg/vectorization.mlir | 4 +-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index dcab00f5603e3..8c8b1b85ef5a3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2178,22 +2178,22 @@ vectorizePadOpPrecondition(tensor::PadOp padOp, inputVectorSizes))) return failure(); - // Low padding is currently unsupported for the general case as this - // would require shifting the results to the right for the low padded dims by - // the required amount of low padding. However, we do support low padding if - // the dims being low padded have result sizes of 1. The reason is when we - // have a low pad on a unit result dim, the input size of that dimension will - // be dynamically zero (as the sum of the low pad and input dim size has to be - // one) and hence we will create a zero mask as the lowering logic just makes - // the mask one for the input dim size - which is zero here. Hence we will - // load the pad value which is what we want in this case. If the low pad is - // dynamically zero then the lowering is correct as well as no shifts are - // necessary. + // Padding with non-zero low pad values is not supported, unless the + // corresponding result dim is 1 as this would require shifting the results to + // the right for the low padded dims by the required amount of low padding. + // However, we do support low padding if the dims being low padded have result + // sizes of 1. The reason is when we have a low pad on a unit result dim, the + // input size of that dimension will be dynamically zero (as the sum of the + // low pad and input dim size has to be one) and hence we will create a zero + // mask as the lowering logic just makes the mask one for the input dim size - + // which is zero here. Hence we will load the pad value which is what we want + // in this case. If the low pad is dynamically zero then the lowering is + // correct as well as no shifts are necessary. if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) { Value padValue = en.value(); unsigned pos = en.index(); - std::optional res = getConstantIntValue(padValue); - return (!res.has_value() || res.value() != 0) && + std::optional pad = getConstantIntValue(padValue); + return (!pad.has_value() || pad.value() != 0) && resultTensorShape[pos] != 1; })) { LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n"); diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index 9410d8b556ca5..f653a4852b074 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -305,10 +305,10 @@ module attributes {transform.with_named_sequence} { // ----- -// Low padding is unsupported with the exception of the special case if the -// result size of the padded dimension is unit. Here `%l0` being a non-zero -// padding applied to a non-unit result dimension makes this case unsupported. -func.func @test_masked_vectorize_lowpad( +// Padding with non-zero low pad values is not supported, unless the corresponding +// result dim is 1. Here `%l0` being a non-zero low pad applied to a +// non-unit result dimension makes this case unsupported. +func.func @tensor_pad_non_zero_low_pad( %0 : tensor, %h0 : index, %h1 : index, %l0 : index) -> tensor<2x4xf32> { // expected-error @+3 {{Attempted to vectorize, but failed}} diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 79b4d13ef16a8..299be1296aa66 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -668,8 +668,8 @@ module attributes {transform.with_named_sequence} { // This case is supported because low padding `%l0` is applied on // a unit dimension which is supported, non unit result dimension low // padding is currently unsupported. -// CHECK-LABEL: func @test_masked_vectorize_unit_lowpad -func.func @test_masked_vectorize_unit_lowpad( +// CHECK-LABEL: func @test_masked_vectorize_non_zero_low_pad_unit_res_dim +func.func @test_masked_vectorize_non_zero_low_pad_unit_res_dim( %0 : tensor, %h0 : index, %h1 : index, %l0 : index) -> tensor<1x4xf32> {