[Executorch][Portable] Dont upcast to double for sigmoid

kirklandsign · kimishpatel · web-flow · commit c242a59e5860 · 2024-11-18T10:07:46.000-08:00
Pull Request resolved: #6892 Upcasting to double for compute precision may not be aten compliant. Reason for internal test change: Apparently running on broadwell CPU vs test runner with Cooper lake gives different results for this change. Without this change: Both broadwell and Cooper lake will produce "Once upon a time, there was a little" With this change: Broadwell still produces "Once upon a time, there was a little", while Cooperlake produces "Once upon a time, there was a girl". So one possibility is that that some XNNPACK kernel for Cooper lake is produces slightly different numerical result that propagates through. Still landing this change since upcasting to double for compute, does not seem necessary. ghstack-source-id: 253832495 @exported-using-ghexport Differential Revision: [D65928920](https://our.internmc.facebook.com/intern/diff/D65928920/) Co-authored-by: Kimish Patel <kimishpatel@fb.com>
diff --git a/kernels/portable/cpu/op_sigmoid.cpp b/kernels/portable/cpu/op_sigmoid.cpp
@@ -8,6 +8,7 @@
 
 #include <cmath>
 
+#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
 #include <executorch/kernels/portable/cpu/util/functional_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
@@ -35,21 +36,26 @@ Tensor& sigmoid_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
       out,
       "Failed to resize output tensor.");
 
-  ScalarType in_type = in.scalar_type();
-  ScalarType out_type = out.scalar_type();
-  ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() {
-    ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() {
-      apply_unary_map_fn(
-          [](const CTYPE_IN val_in) {
-            // perform math in double to preserve precision
-            double in_casted = static_cast<double>(val_in);
-            double out_val = 1.0 / (1.0 + exp(-in_casted));
-            return static_cast<CTYPE_OUT>(out_val);
-          },
-          in.const_data_ptr<CTYPE_IN>(),
-          out.mutable_data_ptr<CTYPE_OUT>(),
-          in.numel());
-    });
+  ScalarType compute_type =
+      executorch::runtime::isFloatingType(in.scalar_type()) ? in.scalar_type()
+                                                            : ScalarType::Float;
+  compute_type = utils::get_compute_type(compute_type);
+
+  // @lint-ignore CLANGTIDY facebook-hte-CArray
+  static constexpr const char op_name[] = "sigmoid.out";
+
+  ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
+    utils::apply_unitensor_elementwise_fn<CTYPE_COMPUTE, op_name>(
+        [](const CTYPE_COMPUTE val_in) {
+          CTYPE_COMPUTE out_val = static_cast<CTYPE_COMPUTE>(1.0) /
+              (static_cast<CTYPE_COMPUTE>(1.0) + exp(-val_in));
+          return out_val;
+        },
+        ctx,
+        in,
+        utils::SupportedTensorDtypes::REALHBBF16,
+        out,
+        utils::SupportedTensorDtypes::FLOATHBF16);
   });
 
   return out;
diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -1080,6 +1080,9 @@ ATEN_OPS = (
         name = "op_sigmoid",
         deps = [
             "//executorch/kernels/portable/cpu/util:functional_util",
+            "//executorch/kernels/portable/cpu/util:elementwise_util",
+            "//executorch/kernels/portable/cpu/util:broadcast_util",
+            "//executorch/kernels/portable/cpu/util:dtype_util",
         ],
     ),
     op_target(