Update

swolchok · swolchok · commit 7c6ecd63eb00 · 2025-01-22T09:59:51.000-08:00
[ghstack-poisoned]
diff --git a/kernels/portable/cpu/op_fill.cpp b/kernels/portable/cpu/op_fill.cpp
@@ -42,7 +42,7 @@ Tensor& fill_scalar_out(
       out,
       "Failed to resize output tensor.");
 
-  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "fill.Scalar_out", CTYPE_A, [&] {
+  ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "fill.Scalar_out", CTYPE_A, [&] {
     CTYPE_A b_casted;
     ET_SWITCH_SCALAR_OBJ_TYPES(b_type, ctx, "fill.Scalar_out", CTYPE_B, [&] {
       CTYPE_B b_val;
@@ -87,14 +87,14 @@ Tensor& fill_tensor_out(
       out,
       "Failed to resize output tensor.");
 
-  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "fill.Tensor_out", CTYPE_A, [&] {
+  ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "fill.Tensor_out", CTYPE_A, [&] {
     CTYPE_A b_casted;
-    ET_SWITCH_REAL_TYPES_AND(
-        Bool, b_type, ctx, "fill.Tensor_out", CTYPE_B, [&] {
-          CTYPE_B b_val;
-          extract_scalar_tensor(b, &b_val);
-          b_casted = static_cast<CTYPE_A>(b_val);
-        });
+    ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, "fill.Tensor_out", CTYPE_B, [&] {
+      CTYPE_B b_val;
+      ET_DCHECK_MSG(
+          extract_scalar_tensor(b, &b_val), "extract_scalar_tensor failed!");
+      b_casted = static_cast<CTYPE_A>(b_val);
+    });
 
     apply_unary_map_fn(
         [b_casted](const CTYPE_A val_a) { return b_casted; },
diff --git a/kernels/portable/cpu/op_gather.cpp b/kernels/portable/cpu/op_gather.cpp
@@ -86,7 +86,7 @@ Tensor& gather_out(
 
   constexpr auto name = "gather.out";
 
-  ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, name, CTYPE, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, name, CTYPE, [&]() {
     gather_helper<CTYPE>(in, index, out, dim);
   });
 
diff --git a/kernels/portable/cpu/op_leaky_relu.cpp b/kernels/portable/cpu/op_leaky_relu.cpp
@@ -44,7 +44,7 @@ Tensor& leaky_relu_out(
 
   ET_KERNEL_CHECK(ctx, in_type == out_type, InvalidArgument, out);
 
-  ET_SWITCH_FLOAT_TYPES(in_type, ctx, "leaky_relu.out", CTYPE, [&]() {
+  ET_SWITCH_FLOATHBF16_TYPES(in_type, ctx, "leaky_relu.out", CTYPE, [&]() {
     CTYPE negative_slope_casted;
     ET_SWITCH_SCALAR_OBJ_TYPES(
         sc_type, ctx, "leaky_relu.out", CTYPE_MIN, [&]() {
diff --git a/kernels/portable/cpu/op_log_softmax.cpp b/kernels/portable/cpu/op_log_softmax.cpp
@@ -42,7 +42,7 @@ Tensor& log_softmax_out(
   // Adjust for negative dim
   dim = dim < 0 ? dim + nonzero_dim(in) : dim;
 
-  ET_SWITCH_FLOAT_TYPES(
+  ET_SWITCH_FLOATHBF16_TYPES(
       in.scalar_type(), ctx, "_log_softmax.out", CTYPE, [&]() {
         const CTYPE* const in_data = in.const_data_ptr<CTYPE>();
         CTYPE* const out_data = out.mutable_data_ptr<CTYPE>();
diff --git a/kernels/portable/cpu/op_logical_not.cpp b/kernels/portable/cpu/op_logical_not.cpp
@@ -33,10 +33,10 @@ logical_not_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
 
   ET_KERNEL_CHECK(ctx, tensors_have_same_shape(in, out), InvalidArgument, out);
 
-  ET_SWITCH_REAL_TYPES_AND(
-      Bool, in.scalar_type(), ctx, "logical_not.out", CTYPE_IN, [&] {
-        ET_SWITCH_REAL_TYPES_AND(
-            Bool, out.scalar_type(), ctx, "logical_not.out", CTYPE_OUT, [&] {
+  ET_SWITCH_REALHBBF16_TYPES(
+      in.scalar_type(), ctx, "logical_not.out", CTYPE_IN, [&] {
+        ET_SWITCH_REALHBBF16_TYPES(
+            out.scalar_type(), ctx, "logical_not.out", CTYPE_OUT, [&] {
               apply_unary_map_fn(
                   [](const CTYPE_IN val_in) {
                     return static_cast<CTYPE_OUT>(!static_cast<bool>(val_in));
diff --git a/kernels/portable/cpu/op_masked_fill.cpp b/kernels/portable/cpu/op_masked_fill.cpp
@@ -42,8 +42,8 @@ Tensor& masked_fill_scalar_out(
   ET_KERNEL_CHECK(
       ctx, tensors_have_same_dim_order(in, mask, out), InvalidArgument, out);
 
-  ET_SWITCH_REAL_TYPES_AND(
-      Bool, in_type, ctx, "masked_fill.Scalar_out", CTYPE, [&]() {
+  ET_SWITCH_REALHBBF16_TYPES(
+      in_type, ctx, "masked_fill.Scalar_out", CTYPE, [&]() {
         ET_SWITCH_REAL_TYPES_AND(
             Bool, val_type, ctx, "masked_fill.Scalar_out", CTYPE_VAL, [&]() {
               CTYPE_VAL value_v;
diff --git a/kernels/portable/cpu/op_max_pool2d_with_indices.cpp b/kernels/portable/cpu/op_max_pool2d_with_indices.cpp
@@ -70,7 +70,7 @@ std::tuple<Tensor&, Tensor&> max_pool2d_with_indices_out(
       ret_val);
 
   ScalarType in_type = in.scalar_type();
-  ET_SWITCH_REAL_TYPES(
+  ET_SWITCH_REALHBF16_TYPES(
       in_type, ctx, "max_pool2d_with_indices.out", CTYPE, [&]() {
         apply_kernel_2d_reduce_then_map_fn<CTYPE>(
             [](const CTYPE in_val,
diff --git a/kernels/portable/cpu/op_mean.cpp b/kernels/portable/cpu/op_mean.cpp
@@ -44,23 +44,24 @@ Tensor& mean_dim_out(
       InvalidArgument,
       out);
 
-  ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, "mean.out", CTYPE_IN, [&] {
-    ET_SWITCH_FLOATH_TYPES(out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] {
-      CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
-      const size_t num = get_reduced_dim_product(in, dim_list);
-      for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
-        CTYPE_OUT sum = 0;
-        if (in.numel() > 0) {
-          sum = map_reduce_over_dim_list<CTYPE_IN, CTYPE_OUT>(
-              [](CTYPE_IN v) { return static_cast<CTYPE_OUT>(v); },
-              [](CTYPE_OUT outv, CTYPE_OUT acc) { return acc + outv; },
-              in,
-              dim_list,
-              out_ix);
-        }
-        out_data[out_ix] = sum / static_cast<float>(num);
-      }
-    });
+  ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, "mean.out", CTYPE_IN, [&] {
+    ET_SWITCH_FLOATHBF16_TYPES(
+        out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] {
+          CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
+          const size_t num = get_reduced_dim_product(in, dim_list);
+          for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+            CTYPE_OUT sum = 0;
+            if (in.numel() > 0) {
+              sum = map_reduce_over_dim_list<CTYPE_IN, CTYPE_OUT>(
+                  [](CTYPE_IN v) { return static_cast<CTYPE_OUT>(v); },
+                  [](CTYPE_OUT outv, CTYPE_OUT acc) { return acc + outv; },
+                  in,
+                  dim_list,
+                  out_ix);
+            }
+            out_data[out_ix] = sum / static_cast<float>(num);
+          }
+        });
   });
 
   return out;
diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt
@@ -139,6 +139,7 @@ set(all_test_sources
     "op_fmod_test.cpp"
     "op_full_like_test.cpp"
     "op_full_test.cpp"
+    "op_gather_test.cpp"
     "op_ge_test.cpp"
     "op_gelu_test.cpp"
     "op_glu_test.cpp"
diff --git a/kernels/test/op_fill_test.cpp b/kernels/test/op_fill_test.cpp
@@ -92,15 +92,15 @@ class OpFillTest : public OperatorTest {
     TEST_FILL_OUT(test_fill_scalar_out, DTYPE);      \
   }
 
-ET_FORALL_REAL_TYPES_AND(Bool, GENERATE_SCALAR_INPUT_SUPPORT_TEST)
+ET_FORALL_REALHBBF16_TYPES(GENERATE_SCALAR_INPUT_SUPPORT_TEST)
 
 // Create input support tests for tensor variant.
 #define GENERATE_TENSOR_INPUT_SUPPORT_TEST(_, DTYPE) \
   TEST_F(OpFillTest, DTYPE##TensorInputSupport) {    \
     TEST_FILL_OUT(test_fill_tensor_out, DTYPE);      \
   }
 
-ET_FORALL_REAL_TYPES_AND(Bool, GENERATE_TENSOR_INPUT_SUPPORT_TEST)
+ET_FORALL_REALHBBF16_TYPES(GENERATE_TENSOR_INPUT_SUPPORT_TEST)
 
 TEST_F(OpFillTest, MismatchedOtherPropertiesDies) {
   TensorFactory<ScalarType::Int> tf;
diff --git a/kernels/test/op_gather_test.cpp b/kernels/test/op_gather_test.cpp
@@ -194,7 +194,7 @@ class OpGatherOutTest : public OperatorTest {
 
 TEST_F(OpGatherOutTest, AllValidInputOutputSupport) {
 #define TEST_ENTRY(CTYPE, DTYPE) test_gather_out<ScalarType::DTYPE>();
-  ET_FORALL_REAL_TYPES(TEST_ENTRY);
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
 #undef TEST_ENTRY
 }
 
diff --git a/kernels/test/op_leaky_relu_test.cpp b/kernels/test/op_leaky_relu_test.cpp
@@ -29,15 +29,21 @@ class OpLeakyReluTest : public OperatorTest {
     return torch::executor::aten::leaky_relu_outf(
         context_, in, negative_slope, out);
   }
-};
+  template <ScalarType DTYPE>
+  void test_leaky_relu_dtype() {
+    TensorFactory<DTYPE> tf;
+    Tensor in = tf.ones({2, 2});
+    Tensor out = tf.zeros({2, 2});
 
-TEST_F(OpLeakyReluTest, SanityCheck) {
-  TensorFactory<ScalarType::Float> tf;
-  Tensor in = tf.ones({2, 2});
-  Tensor out = tf.zeros({2, 2});
+    Tensor ret = op_leaky_relu_out(in, -0.01, out);
 
-  Tensor ret = op_leaky_relu_out(in, -0.01, out);
+    EXPECT_TENSOR_EQ(out, ret);
+    EXPECT_TENSOR_EQ(out, tf.ones({2, 2}));
+  }
+};
 
-  EXPECT_TENSOR_EQ(out, ret);
-  EXPECT_TENSOR_EQ(out, tf.ones({2, 2}));
+TEST_F(OpLeakyReluTest, SanityCheck) {
+#define TEST_ENTRY(ctype, dtype) test_leaky_relu_dtype<ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
 }
diff --git a/kernels/test/op_log_softmax_test.cpp b/kernels/test/op_log_softmax_test.cpp
@@ -62,7 +62,15 @@ class OpLogSoftmaxOutTest : public OperatorTest {
       });
     // clang-format on
 
-    EXPECT_TENSOR_CLOSE(out, expected);
+    if constexpr (DTYPE == ScalarType::BFloat16) {
+      EXPECT_TENSOR_CLOSE_WITH_TOL(
+          out,
+          expected,
+          1e-2,
+          executorch::runtime::testing::internal::kDefaultAtol);
+    } else {
+      EXPECT_TENSOR_CLOSE(out, expected);
+    }
   }
 };
 
@@ -88,11 +96,9 @@ TEST_F(OpLogSoftmaxOutTest, AllDtypesSupported) {
     GTEST_SKIP() << "This kernel does not support dtype double";
   }
 
-  test_dtype<float, ScalarType::Float>();
-  test_dtype<double, ScalarType::Double>();
-  // TODO: Also add tests for half, complex, quantized, and other types. Easiest
-  // way to do that would be to make TensorFactory support zeros() and ones()
-  // for those types.
+#define TEST_ENTRY(ctype, dtype) test_dtype<ctype, ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY)
+#undef TEST_ENTRY
 }
 
 TEST_F(OpLogSoftmaxOutTest, MismatchedDimensionsDies) {
diff --git a/kernels/test/op_logical_not_test.cpp b/kernels/test/op_logical_not_test.cpp
@@ -122,9 +122,9 @@ TEST_F(OpLogicalNotOutTest, AllTypePasses) {
   test_logical_not_out<ScalarType::INPUT_DTYPE, ScalarType::OUTPUT_DTYPE>();
 
 #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
-  ET_FORALL_REAL_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
+  ET_FORALL_REALHBBF16_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
 
-  ET_FORALL_REAL_TYPES(TEST_ENTRY);
+  ET_FORALL_REALHBBF16_TYPES(TEST_ENTRY);
 #undef TEST_ENTRY
 #undef TEST_KERNEL
 }
diff --git a/kernels/test/op_masked_fill_test.cpp b/kernels/test/op_masked_fill_test.cpp
@@ -114,8 +114,11 @@ TEST_F(OpMaskedFillTest, IntTensorFloatAlphaDies) {
           tf.ones(sizes), tf.ones(sizes), /*alpha=*/.7, out));
 }
 
-TEST_F(OpMaskedFillTest, FloatTensors) {
-  test_floating_point_masked_fill_scalar_out<ScalarType::Float>();
+TEST_F(OpMaskedFillTest, FloatingPointTensors) {
+#define TEST_ENTRY(ctype, dtype) \
+  test_floating_point_masked_fill_scalar_out<ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
 }
 
 TEST_F(OpMaskedFillTest, DoubleTensors) {
diff --git a/kernels/test/op_max_pool2d_with_indices_test.cpp b/kernels/test/op_max_pool2d_with_indices_test.cpp
diff --git a/kernels/test/op_mean_test.cpp b/kernels/test/op_mean_test.cpp
diff --git a/runtime/core/exec_aten/util/scalar_type_util.h b/runtime/core/exec_aten/util/scalar_type_util.h
diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h

Original file line number	Diff line number	Diff line change
`@@ -92,15 +92,15 @@ class OpFillTest : public OperatorTest {`
`92`	`92`	`TEST_FILL_OUT(test_fill_scalar_out, DTYPE); \`
`93`	`93`	`}`
`94`	`94`
`95`		`-ET_FORALL_REAL_TYPES_AND(Bool, GENERATE_SCALAR_INPUT_SUPPORT_TEST)`
	`95`	`+ET_FORALL_REALHBBF16_TYPES(GENERATE_SCALAR_INPUT_SUPPORT_TEST)`
`96`	`96`
`97`	`97`	`// Create input support tests for tensor variant.`
`98`	`98`	`#define GENERATE_TENSOR_INPUT_SUPPORT_TEST(_, DTYPE) \`
`99`	`99`	`TEST_F(OpFillTest, DTYPE##TensorInputSupport) { \`
`100`	`100`	`TEST_FILL_OUT(test_fill_tensor_out, DTYPE); \`
`101`	`101`	`}`
`102`	`102`
`103`		`-ET_FORALL_REAL_TYPES_AND(Bool, GENERATE_TENSOR_INPUT_SUPPORT_TEST)`
	`103`	`+ET_FORALL_REALHBBF16_TYPES(GENERATE_TENSOR_INPUT_SUPPORT_TEST)`
`104`	`104`
`105`	`105`	`TEST_F(OpFillTest, MismatchedOtherPropertiesDies) {`
`106`	`106`	`TensorFactory<ScalarType::Int> tf;`