[EE/BE] add float variant to unary_ufunc_realhbf16 (#12290)

pytorchbot · manuelcandales · web-flow · commit 31333b196d13 · 2025-07-09T12:06:04.000-04:00
This PR was created by the merge bot to help merge the original PR into the main branch. ghstack PR number: #12278 by @manuelcandales ^ Please use this as the source of truth for the PR details, comments, and reviews ghstack PR base: https://github.com/pytorch/executorch/tree/gh/manuelcandales/131/base ghstack PR head: https://github.com/pytorch/executorch/tree/gh/manuelcandales/131/head Merge bot PR base: https://github.com/pytorch/executorch/tree/gh/manuelcandales/130/orig Merge bot PR head: https://github.com/pytorch/executorch/tree/gh/manuelcandales/131/orig @diff-train-skip-merge --------- Co-authored-by: Manuel Candales <manuelcandales@gmail.com>
diff --git a/kernels/portable/cpu/op_ceil.cpp b/kernels/portable/cpu/op_ceil.cpp
@@ -16,9 +16,7 @@ namespace native {
 
 using executorch::aten::Tensor;
 
-Tensor& ceil_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbf16(std::ceil, ctx, in, out);
-}
+DEFINE_UNARY_UFUNC_REALHBF16(ceil_out, std::ceil)
 
 } // namespace native
 } // namespace executor
diff --git a/kernels/portable/cpu/op_floor.cpp b/kernels/portable/cpu/op_floor.cpp
@@ -16,9 +16,7 @@ namespace native {
 
 using executorch::aten::Tensor;
 
-Tensor& floor_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbf16(std::floor, ctx, in, out);
-}
+DEFINE_UNARY_UFUNC_REALHBF16(floor_out, std::floor)
 
 } // namespace native
 } // namespace executor
diff --git a/kernels/portable/cpu/op_trunc.cpp b/kernels/portable/cpu/op_trunc.cpp
@@ -14,9 +14,7 @@ namespace torch {
 namespace executor {
 namespace native {
 
-Tensor& trunc_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbf16(std::trunc, ctx, in, out);
-}
+DEFINE_UNARY_UFUNC_REALHBF16(trunc_out, std::trunc)
 
 } // namespace native
 } // namespace executor
diff --git a/kernels/portable/cpu/pattern/pattern.h b/kernels/portable/cpu/pattern/pattern.h
@@ -60,11 +60,17 @@ namespace internal {
  * the input tensor element-wise.
  */
 Tensor& unary_ufunc_realhbf16(
-    double (*fn)(double),
+    float (*fn_float)(float),
+    double (*fn_double)(double),
     KernelRuntimeContext& ctx,
     const Tensor& in,
     Tensor& out);
 
+#define DEFINE_UNARY_UFUNC_REALHBF16(op_name, fn)                             \
+  Tensor& op_name(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { \
+    return internal::unary_ufunc_realhbf16(fn, fn, ctx, in, out);             \
+  }
+
 /**
  * Implements an op pattern for ops that take a single input tensor of any
  * realhb dtye (real, half and boolean), no additional arguments, and outputs a
diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp
@@ -16,12 +16,11 @@ namespace native {
 namespace internal {
 
 Tensor& unary_ufunc_realhbf16(
-    double (*fn)(double),
+    float (*fn_float)(float),
+    double (*fn_double)(double),
     KernelRuntimeContext& ctx,
     const Tensor& in,
     Tensor& out) {
-  (void)ctx;
-
   // Resize for dynamic shape
   ET_KERNEL_CHECK_MSG(
       ctx,
@@ -38,7 +37,17 @@ Tensor& unary_ufunc_realhbf16(
 
   ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, __func__, CTYPE, [&] {
     apply_unary_map_fn(
-        [fn](const CTYPE val_in) { return static_cast<CTYPE>(fn(val_in)); },
+        [fn_double, fn_float](const CTYPE val_in) {
+          if constexpr (std::is_same_v<CTYPE, double>) {
+            (void)fn_float;
+            double xi = static_cast<double>(val_in);
+            return fn_double(xi);
+          } else {
+            (void)fn_double;
+            float xi = static_cast<float>(val_in);
+            return static_cast<CTYPE>(fn_float(xi));
+          }
+        },
         in.const_data_ptr<CTYPE>(),
         out.mutable_data_ptr<CTYPE>(),
         in.numel());