Fix bug in le broadcast for single element broadcast (#11922)

kimishpatel · web-flow · commit 542ea5628e51 · 2025-06-24T17:52:06.000-07:00
Summary: Really comparison ops cannot be handled in the same way as other binary ops for broadcasting because output tensor dtype is different than input tensor dtype. As a result we just have to fall back to portable. Even the current vectorized impl for le, ge etc. assumes that the output type of compare is same as input type. That might actually be a bug. A potential way to handle this maybe via vectorized compare natively supporting binary output vector Test Plan: Tests added which fail before and passes after Reviewers: Subscribers: Tasks: Tags: ### Summary [PLEASE REMOVE] See [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests) for ExecuTorch PR guidelines. [PLEASE REMOVE] If this PR closes an issue, please add a `Fixes #<issue-id>` line. [PLEASE REMOVE] If this PR introduces a fix or feature that should be the upcoming release notes, please add a "Release notes: <area>" label. For a list of available release notes labels, check out [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests). ### Test plan [PLEASE REMOVE] How did you test this PR? Please write down any manual commands you used and note down tests that you have written if applicable.
diff --git a/kernels/optimized/cpu/op_le.cpp b/kernels/optimized/cpu/op_le.cpp
@@ -30,57 +30,8 @@ Tensor& opt_le_tensor_out(
   (void)ctx;
 
   ScalarType a_type = a.scalar_type();
-  ScalarType b_type = b.scalar_type();
   ScalarType out_type = out.scalar_type();
 
-  if (a.numel() == 1 || b.numel() == 1) {
-    const Tensor* tensor;
-    const Tensor* scalar;
-    ScalarType tensor_type;
-    ScalarType scalar_type;
-    if (a.numel() == 1) {
-      tensor = &b;
-      tensor_type = b_type;
-      scalar = &a;
-      scalar_type = a_type;
-    } else {
-      tensor = &a;
-      tensor_type = a_type;
-      scalar = &b;
-      scalar_type = b_type;
-    }
-    ET_KERNEL_CHECK(
-        ctx,
-        resize_to_broadcast_target_size(a, b, out) == Error::Ok,
-        InvalidArgument,
-        out);
-
-    constexpr auto name = "le.Tensor_out";
-
-    ET_SWITCH_REALB_TYPES(tensor_type, ctx, name, CTYPE, [&]() {
-      ET_SWITCH_REALB_TYPES(scalar_type, ctx, name, CTYPE_SCALAR, [&]() {
-        CTYPE_SCALAR scalar_val = *scalar->const_data_ptr<CTYPE_SCALAR>();
-        CTYPE scalar_casted = static_cast<CTYPE>(scalar_val);
-
-        using Vec = at::vec::Vectorized<CTYPE>;
-        if (a.numel() == 1) {
-          at::vec::map<CTYPE>(
-              [scalar_casted](Vec x) { return Vec(scalar_casted).le(x); },
-              out.mutable_data_ptr<CTYPE>(),
-              tensor->const_data_ptr<CTYPE>(),
-              out.numel());
-        } else {
-          at::vec::map<CTYPE>(
-              [scalar_casted](Vec x) { return x.le(Vec(scalar_casted)); },
-              out.mutable_data_ptr<CTYPE>(),
-              tensor->const_data_ptr<CTYPE>(),
-              out.numel());
-        }
-      });
-    });
-    return out;
-  }
-
   // Check for optimized broadcast paths
   auto selected_optimized_path = select_optimized_path(a, b, out);
   if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) {
diff --git a/kernels/test/op_le_test.cpp b/kernels/test/op_le_test.cpp
@@ -1112,3 +1112,70 @@ TEST_F(OpLeTensorOutTest, Broadcast22dBy1dReverseTest) {
 
   EXPECT_TENSOR_EQ(out, tf_bool.make({3, 4}, expected_data));
 }
+
+TEST_F(OpLeTensorOutTest, MonotonicIncreasingVsScalarBroadcastTest) {
+  TensorFactory<ScalarType::Int> tf;
+  TensorFactory<ScalarType::Bool> tf_bool;
+
+  // Test case: 1D tensor [0, 1, 2, ..., 63] vs 2D tensor [1, 1] with value 2
+  std::vector<int32_t> lhs_data;
+  for (int i = 0; i < 64; ++i) {
+    lhs_data.push_back(i);
+  }
+
+  Tensor lhs = tf.make({64}, lhs_data);
+  Tensor rhs = tf.make({1, 1}, {2});
+  Tensor out = tf_bool.zeros({1, 64});
+
+  op_le_tensor_out(lhs, rhs, out);
+
+  // Expected: [0, 1, 2] <= 2 should be [true, true, true], rest false
+  using ctype =
+      executorch::runtime::testing::internal::ScalarTypeToCppTypeWrapper<
+          ScalarType::Bool>::ctype;
+  std::vector<ctype> expected_data;
+  for (int i = 0; i < 64; ++i) {
+    expected_data.push_back(i <= 2);
+  }
+
+  EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data));
+
+  // Test with rhs value 4
+  rhs = tf.make({1, 1}, {4});
+  out = tf_bool.zeros({1, 64});
+
+  op_le_tensor_out(lhs, rhs, out);
+
+  expected_data.clear();
+  for (int i = 0; i < 64; ++i) {
+    expected_data.push_back(i <= 4);
+  }
+
+  EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data));
+
+  // Test with rhs value 10
+  rhs = tf.make({1, 1}, {10});
+  out = tf_bool.zeros({1, 64});
+
+  op_le_tensor_out(lhs, rhs, out);
+
+  expected_data.clear();
+  for (int i = 0; i < 64; ++i) {
+    expected_data.push_back(i <= 10);
+  }
+
+  EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data));
+
+  // Test with rhs value 32
+  rhs = tf.make({1, 1}, {32});
+  out = tf_bool.zeros({1, 64});
+
+  op_le_tensor_out(lhs, rhs, out);
+
+  expected_data.clear();
+  for (int i = 0; i < 64; ++i) {
+    expected_data.push_back(i <= 32);
+  }
+
+  EXPECT_TENSOR_EQ(out, tf_bool.make({1, 64}, expected_data));
+}