From 654a40d3603e5f5e8590e727eca64a42b8cfe3ed Mon Sep 17 00:00:00 2001
From: Matthias Springer <mspringer@nvidia.com>
Date: Sun, 9 Mar 2025 13:52:49 +0100
Subject: [PATCH 1/2] [mlir][Tensor] Check for out-of-bounds extraction in
 `extract_slice` verifier

---
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp      | 57 +++++++++++++++++--
 .../Tensor/bubble-up-extract-slice-op.mlir    |  2 +-
 ...redundant-insert-slice-rank-expansion.mlir |  8 +--
 .../Tensor/fold-tensor-subset-ops.mlir        | 16 ++----
 mlir/test/Dialect/Tensor/invalid.mlir         | 32 +++++++++++
 5 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index e3c8899cd44d9..79132d4ed2ef0 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -2352,13 +2352,48 @@ static LogicalResult produceSliceErrorMsg(SliceVerificationResult result,
   }
 }
 
+/// Verify that the offsets/sizes/strides-style access into the given tensor
+/// is in-bounds. Only static information is verified.
+static LogicalResult verifyInBoundsSlice(Operation *op,
+                                         RankedTensorType tensorType,
+                                         ArrayRef<int64_t> staticOffsets,
+                                         ArrayRef<int64_t> staticSizes,
+                                         ArrayRef<int64_t> staticStrides) {
+  for (int64_t i = 0, e = tensorType.getRank(); i < e; ++i) {
+    // Nothing to verify for dynamic source dims.
+    if (tensorType.isDynamicDim(i))
+      continue;
+    // Nothing to verify if the offset is dynamic.
+    if (ShapedType::isDynamic(staticOffsets[i]))
+      continue;
+    if (staticOffsets[i] >= tensorType.getDimSize(i))
+      return op->emitOpError("offset ") << i << " is out-of-bounds";
+    if (ShapedType::isDynamic(staticSizes[i]) ||
+        ShapedType::isDynamic(staticStrides[i]))
+      continue;
+    if (staticOffsets[i] + (staticSizes[i] - 1) * staticStrides[i] >=
+        tensorType.getDimSize(i))
+      return op->emitOpError("slice along dimension ")
+             << i << " runs out-of-bounds";
+  }
+  return success();
+}
+
 /// Verifier for ExtractSliceOp.
 LogicalResult ExtractSliceOp::verify() {
+  RankedTensorType sourceType = getSourceType();
+
   // Verify result type against inferred type.
   RankedTensorType expectedType = ExtractSliceOp::inferResultType(
-      getSourceType(), getMixedOffsets(), getMixedSizes(), getMixedStrides());
+      sourceType, getMixedOffsets(), getMixedSizes(), getMixedStrides());
   SliceVerificationResult result = isRankReducedType(expectedType, getType());
-  return produceSliceErrorMsg(result, *this, expectedType);
+  if (result != SliceVerificationResult::Success)
+    return produceSliceErrorMsg(result, *this, expectedType);
+
+  // Verify that offsets, sizes, strides do not run out-of-bounds with respect
+  // to the source tensor.
+  return verifyInBoundsSlice(getOperation(), sourceType, getStaticOffsets(),
+                             getStaticSizes(), getStaticStrides());
 }
 
 llvm::SmallBitVector ExtractSliceOp::getDroppedDims() {
@@ -2729,11 +2764,18 @@ static SliceVerificationResult verifyInsertSliceOp(
 
 /// Verifier for InsertSliceOp.
 LogicalResult InsertSliceOp::verify() {
+  // Verify result type against inferred type.
   RankedTensorType expectedType;
   SliceVerificationResult result =
       verifyInsertSliceOp(getSourceType(), getType(), getStaticOffsets(),
                           getStaticSizes(), getStaticStrides(), &expectedType);
-  return produceSliceErrorMsg(result, *this, expectedType);
+  if (result != SliceVerificationResult::Success)
+    return produceSliceErrorMsg(result, *this, expectedType);
+
+  // Verify that offsets, sizes, strides do not run out-of-bounds with respect
+  // to the source tensor.
+  return verifyInBoundsSlice(getOperation(), getDestType(), getStaticOffsets(),
+                             getStaticSizes(), getStaticStrides());
 }
 
 /// If we have two consecutive InsertSliceOp writing to the same slice, we
@@ -3747,11 +3789,18 @@ LogicalResult ParallelInsertSliceOp::verify() {
     return this->emitError("expected ParallelCombiningOpInterface parent, got:")
            << *(getOperation()->getParentOp());
 
+  // Verify result type against inferred type.
   RankedTensorType expectedType;
   SliceVerificationResult result =
       verifyInsertSliceOp(getSourceType(), getDestType(), getStaticOffsets(),
                           getStaticSizes(), getStaticStrides(), &expectedType);
-  return produceSliceErrorMsg(result, *this, expectedType);
+  if (result != SliceVerificationResult::Success)
+    return produceSliceErrorMsg(result, *this, expectedType);
+
+  // Verify that offsets, sizes, strides do not run out-of-bounds with respect
+  // to the source tensor.
+  return verifyInBoundsSlice(getOperation(), getDestType(), getStaticOffsets(),
+                             getStaticSizes(), getStaticStrides());
 }
 
 void ParallelInsertSliceOp::getCanonicalizationPatterns(
diff --git a/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir
index 252e7494bff79..3900bc56f433d 100644
--- a/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir
+++ b/mlir/test/Dialect/Tensor/bubble-up-extract-slice-op.mlir
@@ -31,7 +31,7 @@ func.func @no_bubble_up_extract_slice_on_non_contiguous(%src: tensor<60xf32>) ->
 
 func.func @no_bubble_up_extract_slice_on_stride(%src: tensor<60xf32>) -> tensor<1x1x5xf32> {
   %expand = tensor.expand_shape %src [[0, 1, 2]] output_shape [2, 3, 10] : tensor<60xf32> into tensor<2x3x10xf32>
-  %extract = tensor.extract_slice %expand[0, 0, 5][1, 1, 5][1, 1, 2] : tensor<2x3x10xf32> to tensor<1x1x5xf32>
+  %extract = tensor.extract_slice %expand[0, 0, 0][1, 1, 5][1, 1, 2] : tensor<2x3x10xf32> to tensor<1x1x5xf32>
   return %extract : tensor<1x1x5xf32>
 }
 
diff --git a/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir b/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir
index 88e55062f4770..0496b93257a9e 100644
--- a/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir
+++ b/mlir/test/Dialect/Tensor/drop-redundant-insert-slice-rank-expansion.mlir
@@ -51,16 +51,16 @@ func.func @no_fold_more_unit_dims_insert_slice_of_extract_slice(%in : tensor<?x8
 
 // -----
 
-func.func @no_fold_strided_insert_slice_of_extract_slice(%in : tensor<?x8x2x8xf32>, %dest : tensor<1x4x4xf32>) -> tensor<1x4x4xf32> {
+func.func @no_fold_strided_insert_slice_of_extract_slice(%in : tensor<?x8x2x8xf32>, %dest : tensor<1x15x15xf32>) -> tensor<1x15x15xf32> {
   %extracted_slice = tensor.extract_slice %in[0, 0, 0, 0] [1, 8, 1, 8] [1, 1, 1, 1] : tensor<?x8x2x8xf32> to tensor<8x8xf32>
-  %inserted_slice = tensor.insert_slice %extracted_slice into %dest[0, 0, 0] [1, 8, 8] [1, 2, 2] : tensor<8x8xf32> into tensor<1x4x4xf32>
-  return %inserted_slice : tensor<1x4x4xf32>
+  %inserted_slice = tensor.insert_slice %extracted_slice into %dest[0, 0, 0] [1, 8, 8] [1, 2, 2] : tensor<8x8xf32> into tensor<1x15x15xf32>
+  return %inserted_slice : tensor<1x15x15xf32>
 }
 // CHECK-LABEL: func.func @no_fold_strided_insert_slice_of_extract_slice(
 // CHECK-SAME:      %[[ARG0:.*]]: tensor<?x8x2x8xf32>
 // CHECK:         %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]]
 // CHECK:         %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[EXTRACTED_SLICE]]
-// CHECK:         return %[[INSERTED_SLICE]] : tensor<1x4x4xf32>
+// CHECK:         return %[[INSERTED_SLICE]] : tensor<1x15x15xf32>
 
 // -----
 
diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
index 988b5d835c16e..cf8711eb64ab9 100644
--- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
+++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
@@ -271,38 +271,34 @@ func.func @insert_slice_of_transfer_write_rank_extending(%t1 : tensor<?x?x12xf32
 
 // -----
 
-//       CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 + 2)>
 // CHECK-LABEL: func @insert_slice_of_insert_slice(
 //  CHECK-SAME:     %[[t:[0-9a-z]*]]: tensor<f32>
 //  CHECK-SAME:     %[[r1:[0-9a-z]*]]: tensor<1x14xf32>
 //  CHECK-SAME:     %[[pos:[0-9a-z]*]]: index
-//       CHECK:   %[[add:.*]] = affine.apply #[[$map]]()[%[[pos]]]
-//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][4, %[[add]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
+//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][0, %[[pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
 func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1x1xf32>, %r1: tensor<1x14xf32>, %pos: index)
     -> tensor<1x14xf32> 
 {
-  %0 = tensor.insert_slice %t into %r0[1, 2] [1, 1] [1, 1] 
+  %0 = tensor.insert_slice %t into %r0[0, 0] [1, 1] [1, 1] 
     : tensor<f32> into tensor<1x1xf32>
-  %1 = tensor.insert_slice %0 into %r1[3, %pos] [1, 1] [1, 1] 
+  %1 = tensor.insert_slice %0 into %r1[0, %pos] [1, 1] [1, 1] 
     : tensor<1x1xf32> into tensor<1x14xf32>
   return %1 : tensor<1x14xf32>
 }
 
 // -----
 
-//   CHECK-DAG: #[[$map:.*]] = affine_map<()[s0] -> (s0 + 2)>
 // CHECK-LABEL: func @insert_slice_of_insert_slice(
 //  CHECK-SAME:     %[[t:[0-9a-z]*]]: tensor<f32>
 //  CHECK-SAME:     %[[r1:[0-9a-z]*]]: tensor<1x14xf32>
 //  CHECK-SAME:     %[[pos:[0-9a-z]*]]: index
-//       CHECK:   %[[composed_pos:.+]] = affine.apply #[[$map]]()[%[[pos]]]
-//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][3, %[[composed_pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
+//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][0, %[[pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
 func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1xf32>, %r1: tensor<1x14xf32>, %pos: index)
     -> tensor<1x14xf32> 
 {
-  %0 = tensor.insert_slice %t into %r0[2] [1] [1] 
+  %0 = tensor.insert_slice %t into %r0[0] [1] [1] 
     : tensor<f32> into tensor<1xf32>
-  %1 = tensor.insert_slice %0 into %r1[3, %pos] [1, 1] [1, 1] 
+  %1 = tensor.insert_slice %0 into %r1[0, %pos] [1, 1] [1, 1] 
     : tensor<1xf32> into tensor<1x14xf32>
   return %1 : tensor<1x14xf32>
 }
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index 654169841c1c1..de17a3197623a 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -258,6 +258,22 @@ func.func @illegal_num_offsets(%arg0 : tensor<?x?x?xf32>, %arg1 : index, %arg2 :
 
 // -----
 
+func.func @extract_slice_offset_out_of_bounds(%arg0: tensor<10xf32>) {
+  // expected-error@+1 {{offset 0 is out-of-bounds}}
+  %0 = tensor.extract_slice %arg0 [10][1][1] : tensor<10xf32> to tensor<1xf32>
+  return
+}
+
+// -----
+
+func.func @extract_slice_runs_out_of_bounds(%arg0: tensor<9xf32>) {
+  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds}}
+  %0 = tensor.extract_slice %arg0 [3][4][2] : tensor<9xf32> to tensor<4xf32>
+  return
+}
+
+// -----
+
 func.func @insert_slice_wrong_result_rank(%t1: tensor<?xf32>, %t2: tensor<?x?xf32>, %idx : index) {
   // expected-error @+1 {{expected rank to be smaller or equal to the other rank.}}
   %0 = tensor.insert_slice %t2 into %t1[0][4][1] : tensor<?x?xf32> into tensor<?xf32>
@@ -296,6 +312,22 @@ func.func @insert_slice_wrong_dynamic_type(%t1: tensor<?x4x4xf32>, %t2: tensor<8
 
 // -----
 
+func.func @insert_slice_offset_out_of_bounds(%arg0: tensor<1xf32>, %arg1: tensor<10xf32>) {
+  // expected-error@+1 {{offset 0 is out-of-bounds}}
+  %0 = tensor.insert_slice %arg0 into %arg1 [10][1][1] : tensor<1xf32> into tensor<10xf32>
+  return
+}
+
+// -----
+
+func.func @insert_slice_runs_out_of_bounds(%arg0: tensor<4xf32>, %arg1: tensor<9xf32>) {
+  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds}}
+  %0 = tensor.insert_slice %arg0 into %arg1 [3][4][2] : tensor<4xf32> into tensor<9xf32>
+  return
+}
+
+// -----
+
 func.func @illegal_expanding_reshape_static_tensor
     (%arg0: tensor<2x3x20xf32>) -> tensor<2x3x2x4x5xf32> {
   // expected-error @+1 {{expected dimension 2 of collapsed type to be static value of 40}}

From 8690690b35bcd5cb82b3b5b316d544335061426c Mon Sep 17 00:00:00 2001
From: Matthias Springer <mspringer@nvidia.com>
Date: Tue, 11 Mar 2025 10:42:23 +0100
Subject: [PATCH 2/2] address comments

---
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 12 ++++++++----
 mlir/test/Dialect/Tensor/invalid.mlir    |  8 ++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 79132d4ed2ef0..e5a32c0bbd4e0 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -2367,14 +2367,18 @@ static LogicalResult verifyInBoundsSlice(Operation *op,
     if (ShapedType::isDynamic(staticOffsets[i]))
       continue;
     if (staticOffsets[i] >= tensorType.getDimSize(i))
-      return op->emitOpError("offset ") << i << " is out-of-bounds";
+      return op->emitOpError("offset ")
+             << i << " is out-of-bounds: " << staticOffsets[i]
+             << " >= " << tensorType.getDimSize(i);
     if (ShapedType::isDynamic(staticSizes[i]) ||
         ShapedType::isDynamic(staticStrides[i]))
       continue;
-    if (staticOffsets[i] + (staticSizes[i] - 1) * staticStrides[i] >=
-        tensorType.getDimSize(i))
+    int64_t lastPos =
+        staticOffsets[i] + (staticSizes[i] - 1) * staticStrides[i];
+    if (lastPos >= tensorType.getDimSize(i))
       return op->emitOpError("slice along dimension ")
-             << i << " runs out-of-bounds";
+             << i << " runs out-of-bounds: " << lastPos
+             << " >= " << tensorType.getDimSize(i);
   }
   return success();
 }
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index de17a3197623a..eeb3967337806 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -259,7 +259,7 @@ func.func @illegal_num_offsets(%arg0 : tensor<?x?x?xf32>, %arg1 : index, %arg2 :
 // -----
 
 func.func @extract_slice_offset_out_of_bounds(%arg0: tensor<10xf32>) {
-  // expected-error@+1 {{offset 0 is out-of-bounds}}
+  // expected-error@+1 {{offset 0 is out-of-bounds: 10 >= 10}}
   %0 = tensor.extract_slice %arg0 [10][1][1] : tensor<10xf32> to tensor<1xf32>
   return
 }
@@ -267,7 +267,7 @@ func.func @extract_slice_offset_out_of_bounds(%arg0: tensor<10xf32>) {
 // -----
 
 func.func @extract_slice_runs_out_of_bounds(%arg0: tensor<9xf32>) {
-  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds}}
+  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds: 9 >= 9}}
   %0 = tensor.extract_slice %arg0 [3][4][2] : tensor<9xf32> to tensor<4xf32>
   return
 }
@@ -313,7 +313,7 @@ func.func @insert_slice_wrong_dynamic_type(%t1: tensor<?x4x4xf32>, %t2: tensor<8
 // -----
 
 func.func @insert_slice_offset_out_of_bounds(%arg0: tensor<1xf32>, %arg1: tensor<10xf32>) {
-  // expected-error@+1 {{offset 0 is out-of-bounds}}
+  // expected-error@+1 {{offset 0 is out-of-bounds: 10 >= 10}}
   %0 = tensor.insert_slice %arg0 into %arg1 [10][1][1] : tensor<1xf32> into tensor<10xf32>
   return
 }
@@ -321,7 +321,7 @@ func.func @insert_slice_offset_out_of_bounds(%arg0: tensor<1xf32>, %arg1: tensor
 // -----
 
 func.func @insert_slice_runs_out_of_bounds(%arg0: tensor<4xf32>, %arg1: tensor<9xf32>) {
-  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds}}
+  // expected-error@+1 {{slice along dimension 0 runs out-of-bounds: 9 >= 9}}
   %0 = tensor.insert_slice %arg0 into %arg1 [3][4][2] : tensor<4xf32> into tensor<9xf32>
   return
 }