Add ops: max.unary_out & min.unary_out

manuelcandales · facebook-github-bot · commit ff4c4a490601 · 2024-10-25T12:28:50.000-07:00
Differential Revision: D64986580
diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml
@@ -249,12 +249,16 @@
 
 - op: max.unary_out
 
+- op: max.unary_out
+
 - op: maximum.out
 
 - op: mean.out
 
 - op: min.dim_min
 
+- op: min.unary_out
+
 - op: minimum.out
 
 - op: mm.out
diff --git a/kernels/portable/cpu/op_max.cpp b/kernels/portable/cpu/op_max.cpp
@@ -9,14 +9,23 @@
 #include <cmath>
 #include <tuple>
 
-#include <executorch/kernels/portable/cpu/util/index_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
 
 namespace torch {
 namespace executor {
 namespace native {
+namespace {
+
+template <typename CTYPE>
+constexpr CTYPE lower_bound() {
+  using lim = std::numeric_limits<CTYPE>;
+  return lim::has_infinity ? -lim::infinity() : lim::lowest();
+}
+
+} // namespace
 
 using ScalarType = exec_aten::ScalarType;
 using SizesType = exec_aten::SizesType;
@@ -94,6 +103,41 @@ std::tuple<Tensor&, Tensor&> max_out(
   return {max, max_indices};
 }
 
+Tensor& max_unary_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& in,
+    Tensor& out) {
+  (void)ctx;
+
+  ET_KERNEL_CHECK(
+      ctx, resize_tensor(out, {}) == Error::Ok, InvalidArgument, out);
+
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
+
+  ScalarType in_type = in.scalar_type();
+  ScalarType out_type = out.scalar_type();
+
+  ET_KERNEL_CHECK(
+      ctx, canCast(in_type, out_type), InvalidArgument, out);
+
+  constexpr auto name = "max.unary_out";
+
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, name, CTYPE_IN, [&] {
+    ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] {
+      const auto data_in = in.const_data_ptr<CTYPE_IN>();
+      auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
+      data_out[0] = lower_bound<CTYPE_OUT>();
+      for (auto i = 0; i < in.numel(); ++i) {
+        data_out[0] = utils::max_override(
+            static_cast<CTYPE_OUT>(data_in[i]), data_out[0]);
+      }
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/op_min.cpp b/kernels/portable/cpu/op_min.cpp
@@ -9,14 +9,23 @@
 #include <cmath>
 #include <tuple>
 
-#include <executorch/kernels/portable/cpu/util/index_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
+#include <executorch/kernels/portable/cpu/util/math_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
 
 namespace torch {
 namespace executor {
 namespace native {
+namespace {
+
+template <typename CTYPE>
+constexpr CTYPE upper_bound() {
+  using lim = std::numeric_limits<CTYPE>;
+  return lim::has_infinity ? lim::infinity() : lim::max();
+}
+
+} // namespace
 
 using ScalarType = exec_aten::ScalarType;
 using SizesType = exec_aten::SizesType;
@@ -94,6 +103,41 @@ std::tuple<Tensor&, Tensor&> min_out(
   return {min, min_indices};
 }
 
+Tensor& min_unary_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& in,
+    Tensor& out) {
+  (void)ctx;
+
+  ET_KERNEL_CHECK(
+      ctx, resize_tensor(out, {}) == Error::Ok, InvalidArgument, out);
+
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
+
+  ScalarType in_type = in.scalar_type();
+  ScalarType out_type = out.scalar_type();
+
+  ET_KERNEL_CHECK(
+      ctx, canCast(in_type, out_type), InvalidArgument, out);
+
+  constexpr auto name = "min.unary_out";
+
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, name, CTYPE_IN, [&] {
+    ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] {
+      const auto data_in = in.const_data_ptr<CTYPE_IN>();
+      auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
+      data_out[0] = upper_bound<CTYPE_OUT>();
+      for (auto i = 0; i < in.numel(); ++i) {
+        data_out[0] = utils::min_override(
+            static_cast<CTYPE_OUT>(data_in[i]), data_out[0]);
+      }
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/util/math_util.h b/kernels/portable/cpu/util/math_util.h
@@ -96,8 +96,10 @@ INT_T max_override(INT_T a, INT_T b) {
 
 template <
     typename T,
-    typename std::enable_if<std::is_same<T, exec_aten::Half>::value, bool>::
-        type = true>
+    typename std::enable_if<
+        std::is_same<T, exec_aten::Half>::value ||
+            std::is_same<T, exec_aten::BFloat16>::value,
+        bool>::type = true>
 T min_override(T a, T b) {
   const auto float_a = static_cast<float>(a);
   if (std::isnan(float_a)) {
@@ -116,8 +118,10 @@ T min_override(T a, T b) {
 
 template <
     typename T,
-    typename std::enable_if<std::is_same<T, exec_aten::Half>::value, bool>::
-        type = true>
+    typename std::enable_if<
+        std::is_same<T, exec_aten::Half>::value ||
+            std::is_same<T, exec_aten::BFloat16>::value,
+        bool>::type = true>
 T max_override(T a, T b) {
   const auto float_a = static_cast<float>(a);
   if (std::isnan(float_a)) {
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -552,6 +552,11 @@
     - arg_meta: null
       kernel_name: torch::executor::max_out
 
+- op: max.unary_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::max_unary_out
+
 - op: maximum.out
   kernels:
     - arg_meta: null
@@ -572,6 +577,11 @@
     - arg_meta: null
       kernel_name: torch::executor::min_out
 
+- op: min.unary_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::min_unary_out
+
 - op: minimum.out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/op_max_test.cpp b/kernels/test/op_max_test.cpp
@@ -222,6 +222,65 @@ void OpMaxOutTest::test_max_out_dtype<ScalarType::Bool>() {
   // clang-format on
 }
 
+class OpMaxUnaryOutTest : public OperatorTest {
+ protected:
+  Tensor& op_max_unary_out(
+      const Tensor& self,
+      Tensor& out) {
+    return torch::executor::aten::max_outf(
+        context_, self, out);
+  }
+
+  template <ScalarType IN_DTYPE>
+  void test_max_unary_out_dtype() {
+    TensorFactory<IN_DTYPE> tf_in;
+    TensorFactory<ScalarType::Float> tf_out;
+    Tensor input = tf_in.make({2, 3}, {0, 1, 2, 4, 4, 2});
+    Tensor out = tf_out.zeros({});
+    Tensor expected = tf_out.make({}, {4});
+    op_max_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_max_unary_out_empty_integer() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {std::numeric_limits<CTYPE>::lowest()});
+    op_max_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_max_unary_out_empty_floating() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {-INFINITY});
+    op_max_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+};
+
+TEST_F(OpMaxUnaryOutTest, AllRealHBF16InputFloatOutputPasses) {
+#define TEST_ENTRY(ctype, dtype) test_max_unary_out_dtype<ScalarType::dtype>();
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMaxUnaryOutTest, EmptyIntegerInput) {
+#define TEST_ENTRY(ctype, dtype) test_max_unary_out_empty_integer<ctype, ScalarType::dtype>();
+  ET_FORALL_INT_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMaxUnaryOutTest, EmptyFloatingInput) {
+#define TEST_ENTRY(ctype, dtype) test_max_unary_out_empty_floating<ctype, ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
 TEST_F(OpMaxOutTest, MismatchedDimensionsDies) {
   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
     GTEST_SKIP() << "ATen kernel test fails";
diff --git a/kernels/test/op_min_test.cpp b/kernels/test/op_min_test.cpp
@@ -218,6 +218,65 @@ EXPECT_TENSOR_EQ(min_indices, tf_long.make(
   // clang-format on
 }
 
+class OpMinUnaryOutTest : public OperatorTest {
+ protected:
+  Tensor& op_min_unary_out(
+      const Tensor& self,
+      Tensor& out) {
+    return torch::executor::aten::min_outf(
+        context_, self, out);
+  }
+
+  template <ScalarType IN_DTYPE>
+  void test_min_unary_out_dtype() {
+    TensorFactory<IN_DTYPE> tf_in;
+    TensorFactory<ScalarType::Float> tf_out;
+    Tensor input = tf_in.make({2, 3}, {7, 1, 3, 4, 4, 2});
+    Tensor out = tf_out.zeros({});
+    Tensor expected = tf_out.make({}, {1});
+    op_min_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_min_unary_out_empty_integer() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {std::numeric_limits<CTYPE>::max()});
+    op_min_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_min_unary_out_empty_floating() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {INFINITY});
+    op_min_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+};
+
+TEST_F(OpMinUnaryOutTest, AllRealHBF16InputFloatOutputPasses) {
+#define TEST_ENTRY(ctype, dtype) test_min_unary_out_dtype<ScalarType::dtype>();
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMinUnaryOutTest, EmptyIntegerInput) {
+#define TEST_ENTRY(ctype, dtype) test_min_unary_out_empty_integer<ctype, ScalarType::dtype>();
+  ET_FORALL_INT_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMinUnaryOutTest, EmptyFloatingInput) {
+#define TEST_ENTRY(ctype, dtype) test_min_unary_out_empty_floating<ctype, ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
 TEST_F(OpMinOutTest, MismatchedDimensionsDies) {
   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
     GTEST_SKIP() << "ATen kernel test fails";
diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -787,7 +787,7 @@ ATEN_OPS = (
         deps = [
             "//executorch/runtime/core/exec_aten/util:scalar_type_util",
             "//executorch/runtime/core/exec_aten/util:tensor_util",
-            "//executorch/kernels/portable/cpu/util:index_util",
+            "//executorch/kernels/portable/cpu/util:math_util",
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),
@@ -821,7 +821,7 @@ ATEN_OPS = (
         deps = [
             "//executorch/runtime/core/exec_aten/util:scalar_type_util",
             "//executorch/runtime/core/exec_aten/util:tensor_util",
-            "//executorch/kernels/portable/cpu/util:index_util",
+            "//executorch/kernels/portable/cpu/util:math_util",
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),