Fix write-heap-buffer-overflow in et_copy_index (pytorch#15605)

lucylq · facebook-github-bot · commit 428cc06aa2c8 · 2025-11-05T14:10:01.000-08:00
Summary:

1. Fix security bug; ensure index is within bounds (0 &lt;= index &lt; size)
--&gt; index &lt; size check happens after potential tensor resize
2. Convert ET_CHECK_MSG --&gt; ET_KERNEL_CHECK_MSG for error reporting
---

The crash is a write-heap-buffer-overflow that occurs in the `et_copy_index` function. The root cause is the lack of proper validation of the `index` argument, which can lead to an out-of-bounds write when `index` is negative or exceeds the bounds of the `copy_to` tensor.

The patch fixes the crash by adding two checks: `ET_CHECK_MSG(index &gt;= 0, "Index must be non-negative");` and `ET_CHECK_MSG(index &lt; copy_to.sizes()[0], "Index out of bounds");`. These checks ensure that `index` is within the valid range for the `copy_to` tensor, preventing the out-of-bounds write.

Other considerations that reviewers should take into account when validating the patch include verifying that the added checks do not introduce any performance regressions and that they correctly handle edge cases, such as when `index` is equal to `copy_to.sizes()[0] - 1`. Reviewers should also check that the patch does not alter the existing functionality of the `et_copy_index` function and that it is consistent with the surrounding code.

Additionally, reviewers may want to consider testing the patch with various inputs, including negative `index` values, `index` values that exceed the bounds of `copy_to`, and valid `index` values, to ensure that the patch correctly prevents the write-heap-buffer-overflow crash.

Here is the commit message:

```
Fix write-heap-buffer-overflow crash in et_copy_index

The crash is a write-heap-buffer-overflow that occurs in the `et_copy_index` function.
The root cause is the lack of proper validation of the `index` argument, which can lead to an out-of-bounds write when `index` is negative or exceeds the bounds of the `copy_to` tensor.

The patch fixes the crash by adding two checks:
```cpp
ET_CHECK_MSG(index &gt;= 0, "Index must be non-negative");
ET_CHECK_MSG(index &lt; copy_to.sizes()[0], "Index out of bounds");
```
These checks ensure that `index` is within the valid range for the `copy_to` tensor, preventing the out-of-bounds write.

Other considerations that reviewers should take into account when validating the patch include verifying that the added checks do not introduce any performance regressions and that they correctly handle edge cases, such as when `index` is equal to `copy_to.sizes()[0] - 1`. Reviewers should also check that the patch does not alter the existing functionality of the `et_copy_index` function and that it is consistent with the surrounding code.
```

NOTE: This diff is entirely auto-generated by LLM-based patch generator.
Reviewer should carefully examine this diff as Lionhead does not guarrantee the
correctnesss of the patch beyond fixing the crash and passing existing tests.
Please commandeer this diff and revise as needed. Our bot does not respond to
comments or revision requests (yet).

Differential Revision: D80399111
diff --git a/kernels/prim_ops/et_copy_index.cpp b/kernels/prim_ops/et_copy_index.cpp
@@ -59,7 +59,7 @@ constexpr size_t kTensorDimensionLimit = 16;
 //        torch.ops.executorch.prim.add.int(iteration_index, 1, iteration_index)
 //        done_bool = torch.ops.executorch.prim.eq.int(iteration_index,
 //        sym_size, done_bool) # Emitter inserts a instruction here, if
-//        done_bool == False jump to selcect_copy op # if not continue. return
+//        done_bool == False jump to select_copy op # if not continue. return
 //        add_tensor
 //
 // The output of each iteration (copy_from) is copied into the copy_to tensor at
@@ -79,12 +79,25 @@ void et_copy_index(KernelRuntimeContext& context, Span<EValue*> stack) {
   auto copy_from = (*stack[1]).toTensor();
   auto index = (*stack[2]).toInt();
 
+  ET_KERNEL_CHECK_MSG(
+      context,
+      index >= 0,
+      InvalidArgument,
+      /* void */,
+      "Expected index to be non-negative, got %" ET_PRI_TENSOR_SIZE,
+      static_cast<size_t>(index));
+
   // Number of bytes we need to copy over from copy_from tensor.
   size_t size_copy_from = (copy_from.element_size()) * (copy_from.numel());
 
-  ET_CHECK_MSG(
+  ET_KERNEL_CHECK_MSG(
+      context,
       (copy_to.sizes().size() - copy_from.sizes().size()) == 1,
-      "Ranks of copy_to  and copy_from tensor should only differ by 1.");
+      InvalidArgument,
+      /* void */,
+      "Ranks of copy_to %zu and copy_from tensor %zu should only differ by 1.",
+      copy_to.sizes().size(),
+      copy_from.sizes().size());
 
   // Here we calculate the size of the out_tensor after copy_from has
   // been copied to it. This will be passed onto the resize call.
@@ -93,9 +106,15 @@ void et_copy_index(KernelRuntimeContext& context, Span<EValue*> stack) {
     // If we're copying past the first index then the shape of
     // copy_from and copy_to without the leading dimension should be
     // the same. i.e. copy_to.size[1:] == copy_from.size[:].
-    ET_CHECK_MSG(
+    ET_KERNEL_CHECK_MSG(
+        context,
         copy_to.sizes()[i + 1] == copy_from.sizes()[i],
-        "Mismatch in shape between copy_to and copy_from tensors");
+        InvalidArgument,
+        /* void */,
+        "Mismatch in shape between copy_to %" ET_PRI_TENSOR_SIZE
+        " and copy_from %" ET_PRI_TENSOR_SIZE " tensors",
+        static_cast<size_t>(copy_to.sizes()[i + 1]),
+        static_cast<size_t>(copy_from.sizes()[i]));
     expected_output_size[i + 1] = copy_from.sizes()[i];
   }
 
@@ -105,11 +124,25 @@ void et_copy_index(KernelRuntimeContext& context, Span<EValue*> stack) {
     Error err =
         resize_tensor(copy_to, {expected_output_size, copy_to.sizes().size()});
     ET_CHECK(err == Error::Ok);
-    ET_CHECK_MSG(
+    ET_KERNEL_CHECK_MSG(
+        context,
         data_ptr == copy_to.const_data_ptr(),
+        InvalidState,
+        /* void */,
         "Data ptr of copy_to tensor changed after resize which isn't allowed for static/upper-bounded tensors");
   }
 
+  // After potential resize, verify that index is within bounds.
+  ET_KERNEL_CHECK_MSG(
+      context,
+      index < copy_to.sizes()[0],
+      InvalidArgument,
+      /* void */,
+      "Index %" ET_PRI_TENSOR_SIZE
+      " out of bounds for tensor size %" ET_PRI_TENSOR_SIZE,
+      static_cast<size_t>(index),
+      static_cast<size_t>(copy_to.sizes()[0]));
+
   auto copy_to_ptr = copy_to.const_data_ptr();
   auto copy_from_ptr = copy_from.const_data_ptr();
 
@@ -118,12 +151,23 @@ void et_copy_index(KernelRuntimeContext& context, Span<EValue*> stack) {
   // copy_from into the copy_to tensor.
 
   // Check that the destination has enough space for the copy.
+  ET_KERNEL_CHECK_MSG(
+      context,
+      size_copy_from == 0 || index <= SIZE_MAX / size_copy_from,
+      InvalidArgument,
+      /* void */,
+      "Offset multiplication overflow. size_copy_from: %zu, index: %" ET_PRI_TENSOR_SIZE,
+      size_copy_from,
+      static_cast<size_t>(index));
   size_t offset = index * size_copy_from;
   size_t copy_to_size = copy_to.element_size() * copy_to.numel();
-  ET_CHECK_MSG(
-      offset + size_copy_from <= copy_to_size,
-      "Buffer overflow: copy_to tensor is smaller than copy_from tensor.");
-
+  ET_KERNEL_CHECK_MSG(
+      context,
+      (offset <= SIZE_MAX - size_copy_from) &&
+          (offset + size_copy_from <= copy_to_size),
+      InvalidArgument,
+      /* void */,
+      "Buffer overflow; offset overflow or copy_to tensor is smaller than copy_from tensor.");
   memcpy(
       // NOLINTNEXTLINE(performance-no-int-to-ptr)
       (void*)((uintptr_t)copy_to_ptr + offset),
diff --git a/kernels/prim_ops/test/prim_ops_test.cpp b/kernels/prim_ops/test/prim_ops_test.cpp
@@ -276,8 +276,9 @@ TEST_F(RegisterPrimOpsTest, TestETCopyIndexMismatchShape) {
   // Try to copy and replace at index 1. This will fail because
   // copy_to.sizes[1:] and to_copy.sizes[:] don't match each other
   // which is a pre-requisite for this operator.
-  ET_EXPECT_DEATH(
-      getOpsFn("executorch_prim::et_copy_index.tensor")(context_, stack), "");
+  ET_EXPECT_KERNEL_FAILURE(
+      context_,
+      getOpsFn("executorch_prim::et_copy_index.tensor")(context_, stack));
 }
 
 TEST_F(RegisterPrimOpsTest, TestETCopyIndexStaticShape) {