From 69bfa50645293c8b406a6a46df5245cb7dc1f289 Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Fri, 5 Sep 2025 07:19:12 -0700
Subject: [PATCH] Add boolean support for op_unbind_copy (#13956)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Fixes https://github.com/pytorch/executorch/issues/13552

🐛 Problem

The unbind_copy kernel crashed with fatal error when processing boolean tensors:
[op_unbind_copy.cpp:79] assert failed (false): Unhandled dtype Bool for unbind_copy.int_out
zsh: abort

✅ Solution

Extended type support to include boolean tensors by switching from ET_SWITCH_REALHBF16_TYPES to ET_SWITCH_REALHBBF16_TYPES macro.

📝 Changes

kernels/portable/cpu/op_unbind_copy.cpp:
// Before: Crashed on boolean tensors
ET_SWITCH_REALHBF16_TYPES(...)

// After: Supports boolean tensors
ET_SWITCH_REALHBBF16_TYPES(...)  // Includes Bool type

kernels/test/op_unbind_copy_test.cpp:
- Added BooleanTensorUnbindDim2 test case with input torch.bool (1, 7, 4) unbinding on dimension 2
- Validates correct output shape and data integrity

🛡️ Benefits

- No more crashes: Boolean tensors are now properly handled
- Broader compatibility: Supports additional data type for unbind operations
- Regression protection: Test ensures boolean support is maintained

✅ Testing

Verified boolean tensor unbinding works correctly on both portable and optimized kernels with comprehensive test coverage.


Reviewed By: manuelcandales

Differential Revision: D81705165

Pulled By: mergennachin
---
 kernels/portable/cpu/op_unbind_copy.cpp |  4 +-
 kernels/test/op_unbind_copy_test.cpp    | 51 +++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/kernels/portable/cpu/op_unbind_copy.cpp b/kernels/portable/cpu/op_unbind_copy.cpp
index bcf65c673b3..9e99d44b2b0 100644
--- a/kernels/portable/cpu/op_unbind_copy.cpp
+++ b/kernels/portable/cpu/op_unbind_copy.cpp
@@ -55,9 +55,9 @@ void unbind_copy_int_out(
   ScalarType in_type = input.scalar_type();
   ScalarType out_type = out[0].scalar_type();
 
-  ET_SWITCH_REALHBF16_TYPES(
+  ET_SWITCH_REALHBBF16_TYPES(
       in_type, ctx, "unbind_copy.int_out", CTYPE_IN, [&]() {
-        ET_SWITCH_REALHBF16_TYPES(
+        ET_SWITCH_REALHBBF16_TYPES(
             out_type, ctx, "unbind_copy.int_out", CTYPE_OUT, [&]() {
               const CTYPE_IN* const input_data =
                   input.const_data_ptr<CTYPE_IN>();
diff --git a/kernels/test/op_unbind_copy_test.cpp b/kernels/test/op_unbind_copy_test.cpp
index 1dd5c3cebf9..70825537490 100644
--- a/kernels/test/op_unbind_copy_test.cpp
+++ b/kernels/test/op_unbind_copy_test.cpp
@@ -374,3 +374,54 @@ TEST_F(OpUnbindCopyIntOutTest, DynamicShapeUnbound) {
   test_dynamic_shape(
       {1, 1}, torch::executor::TensorShapeDynamism::DYNAMIC_UNBOUND);
 }
+
+TEST_F(OpUnbindCopyIntOutTest, BooleanTensorUnbindDim2) {
+  // Test case with inputs:
+  // ArgType.Tensor torch.bool (1, 7, 4)
+  // ArgType.Dim 2
+  TensorFactory<ScalarType::Bool> tf;
+  TensorListFactory<ScalarType::Bool> tlf;
+
+  // Create input tensor of shape (1, 7, 4) filled with bool values
+  Tensor input = tf.zeros({1, 7, 4});
+  auto in_data = input.mutable_data_ptr<bool>();
+
+  // Fill with alternating true/false pattern
+  for (int i = 0; i < 1 * 7 * 4; i++) {
+    in_data[i] = (i % 2) == 0;
+  }
+
+  // Unbinding along dimension 2 should produce 4 tensors of shape (1, 7)
+  int64_t unbind_dim = 2;
+  int64_t num_outputs = input.size(unbind_dim); // Should be 4
+
+  // Create output tensors
+  std::vector<Tensor> outputs;
+  for (int i = 0; i < num_outputs; i++) {
+    outputs.push_back(tf.zeros({1, 7}));
+  }
+  TensorList out = tlf.zeros_like(outputs);
+
+  // Perform unbind operation - boolean tensors are now supported
+  op_unbind_copy_int_out(input, unbind_dim, out);
+
+  // Verify outputs
+  for (int output_idx = 0; output_idx < num_outputs; output_idx++) {
+    EXPECT_EQ(out[output_idx].dim(), 2);
+    EXPECT_EQ(out[output_idx].size(0), 1);
+    EXPECT_EQ(out[output_idx].size(1), 7);
+
+    auto out_data = out[output_idx].const_data_ptr<bool>();
+
+    // Verify the data correctness
+    for (int i = 0; i < 1; i++) {
+      for (int j = 0; j < 7; j++) {
+        int input_idx = i * 7 * 4 + j * 4 + output_idx;
+        bool expected = (input_idx % 2) == 0;
+        EXPECT_EQ(out_data[i * 7 + j], expected)
+            << "Mismatch at output[" << output_idx << "][" << i << "][" << j
+            << "]";
+      }
+    }
+  }
+}