diff --git a/kernels/portable/cpu/op_amax.cpp b/kernels/portable/cpu/op_amax.cpp index 4ad409d4820..e055f5b9392 100644 --- a/kernels/portable/cpu/op_amax.cpp +++ b/kernels/portable/cpu/op_amax.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -51,7 +52,7 @@ Tensor& amax_out( for (const auto out_ix : c10::irange(begin, end)) { out_data[out_ix] = plan.execute( [](CTYPE v, CTYPE max_v) { - return std::isnan(v) || v > max_v ? v : max_v; + return utils::isnan_override(v) || v > max_v ? v : max_v; }, out_ix); } diff --git a/kernels/portable/cpu/op_amin.cpp b/kernels/portable/cpu/op_amin.cpp index 396cb6c016d..cc348dfefaa 100644 --- a/kernels/portable/cpu/op_amin.cpp +++ b/kernels/portable/cpu/op_amin.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -50,7 +51,7 @@ Tensor& amin_out( for (const auto out_ix : c10::irange(begin, end)) { out_data[out_ix] = plan.execute( [](CTYPE v, CTYPE min_v) { - return std::isnan(v) || v < min_v ? v : min_v; + return utils::isnan_override(v) || v < min_v ? v : min_v; }, out_ix); } diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp index 72881453d39..c7ee30cc0e8 100644 --- a/kernels/portable/cpu/op_argmax.cpp +++ b/kernels/portable/cpu/op_argmax.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -55,7 +56,7 @@ Tensor& argmax_out( // the below condition as written is equivalent to // !isnan(accval) && (isnan(v) || v > acc_val). See // argument in op_argmin.cpp. - if (!std::isnan(acc_val) && !(v <= acc_val)) { + if (!utils::isnan_override(acc_val) && !(v <= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp index 4e661c68694..d924cfeee39 100644 --- a/kernels/portable/cpu/op_argmin.cpp +++ b/kernels/portable/cpu/op_argmin.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -62,7 +63,7 @@ Tensor& argmin_out( // - false, so the result is true. The result is trivially // - true for the above condition that uses isnan(v) as // - well. - if (!std::isnan(acc_val) && !(v >= acc_val)) { + if (!utils::isnan_override(acc_val) && !(v >= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_max.cpp b/kernels/portable/cpu/op_max.cpp index 3f4a1d27c0e..cdea0834806 100644 --- a/kernels/portable/cpu/op_max.cpp +++ b/kernels/portable/cpu/op_max.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -88,8 +89,8 @@ std::tuple max_out( for (const auto out_ix : c10::irange(begin, end)) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && - (std::isnan(v) || v > acc_val)) { + if (!utils::isnan_override(acc_val) && + (utils::isnan_override(v) || v > acc_val)) { acc_val = v; acc_ix = ix; } @@ -132,7 +133,7 @@ max_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { data_out[0] = lower_bound(); for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); - if (std::isnan(val)) { + if (utils::isnan_override(val)) { data_out[0] = val; break; } diff --git a/kernels/portable/cpu/op_min.cpp b/kernels/portable/cpu/op_min.cpp index 8b70bcd40f5..d4d59d04128 100644 --- a/kernels/portable/cpu/op_min.cpp +++ b/kernels/portable/cpu/op_min.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -88,8 +89,8 @@ std::tuple min_out( for (const auto out_ix : c10::irange(begin, end)) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && - (std::isnan(v) || v < acc_val)) { + if (!utils::isnan_override(acc_val) && + (utils::isnan_override(v) || v < acc_val)) { acc_val = v; acc_ix = ix; } @@ -132,7 +133,7 @@ min_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { data_out[0] = upper_bound(); for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); - if (std::isnan(val)) { + if (utils::isnan_override(val)) { data_out[0] = val; break; } diff --git a/kernels/portable/cpu/op_relu.cpp b/kernels/portable/cpu/op_relu.cpp index 973542a2a77..4b848fa17e4 100644 --- a/kernels/portable/cpu/op_relu.cpp +++ b/kernels/portable/cpu/op_relu.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -45,7 +46,9 @@ Tensor& relu_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "relu.out", CTYPE, [&]() { apply_unary_map_fn( [](const CTYPE val_in) { - return (std::isnan(val_in) || val_in >= CTYPE(0)) ? val_in : CTYPE(0); + return (utils::isnan_override(val_in) || val_in >= CTYPE(0)) + ? val_in + : CTYPE(0); }, in.const_data_ptr(), out.mutable_data_ptr(), diff --git a/kernels/portable/cpu/op_sign.cpp b/kernels/portable/cpu/op_sign.cpp index e6945094973..56d07133539 100644 --- a/kernels/portable/cpu/op_sign.cpp +++ b/kernels/portable/cpu/op_sign.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -42,7 +43,7 @@ Tensor& sign_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "sign.out", CTYPE, [&] { apply_unary_map_fn( [](const CTYPE val_in) { - if (std::isnan(val_in)) { + if (utils::isnan_override(val_in)) { return val_in; } else { return static_cast((val_in > 0) - (val_in < 0)); diff --git a/kernels/portable/cpu/op_topk.cpp b/kernels/portable/cpu/op_topk.cpp index e35e67193bf..e2143ce78d5 100644 --- a/kernels/portable/cpu/op_topk.cpp +++ b/kernels/portable/cpu/op_topk.cpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include namespace torch { @@ -62,7 +64,7 @@ bool float_less_than(T x, T y) { if constexpr (std::is_integral_v) { return x < y; } - return (!std::isnan(x) && std::isnan(y)) || x < y; + return (!utils::isnan_override(x) && utils::isnan_override(y)) || x < y; } template > diff --git a/kernels/portable/cpu/util/math_util.h b/kernels/portable/cpu/util/math_util.h index 2c4828b9e6e..a3a64997a5f 100644 --- a/kernels/portable/cpu/util/math_util.h +++ b/kernels/portable/cpu/util/math_util.h @@ -8,10 +8,14 @@ #pragma once +#include + #if defined(ET_USE_PYTORCH_HEADERS) && ET_USE_PYTORCH_HEADERS #include #endif +#include + namespace torch { namespace executor { namespace native { @@ -29,7 +33,8 @@ template < typename std::enable_if::value, bool>::type = true> INT_T floor_divide(INT_T a, INT_T b) { const auto quot = a / b; - if (std::signbit(a) == std::signbit(b)) { + // MSVC does not like signbit on integral types. + if ((a < 0) == (b < 0)) { return quot; } const auto rem = a % b; @@ -52,6 +57,20 @@ FLOAT_T floor_divide(FLOAT_T a, FLOAT_T b) { return div; } +/** + * A wrapper around std::isnan that works with MSVC. When building with MSVC, + * std::isnan calls with integer inputs fail to compile due to ambiguous + * overload resolution. + */ +template +bool isnan_override(T a) { + if constexpr (!std::is_integral_v) { + return std::isnan(a); + } else { + return false; + } +} + /** * Override min/max so we can emulate PyTorch's behavior with NaN entries. */ diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl index 73dfafdc65d..4c63dd3420c 100644 --- a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -246,6 +246,7 @@ ATEN_OPS = ( deps = [ "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -255,6 +256,7 @@ ATEN_OPS = ( "//executorch/runtime/core/exec_aten/util:scalar_type_util", "//executorch/runtime/core/exec_aten/util:tensor_util", "//executorch/kernels/portable/cpu/util:index_util", + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -278,12 +280,14 @@ ATEN_OPS = ( op_target( name = "op_argmax", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), op_target( name = "op_argmin", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -806,6 +810,7 @@ ATEN_OPS = ( op_target( name = "op_max", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -843,6 +848,7 @@ ATEN_OPS = ( op_target( name = "op_min", deps = [ + "//executorch/kernels/portable/cpu/util:math_util", "//executorch/kernels/portable/cpu/util:reduce_util", ], ), @@ -1019,6 +1025,7 @@ ATEN_OPS = ( name = "op_relu", deps = [ "//executorch/kernels/portable/cpu/util:functional_util", + "//executorch/kernels/portable/cpu/util:math_util", ], ), op_target( @@ -1129,6 +1136,7 @@ ATEN_OPS = ( name = "op_sign", deps = [ "//executorch/kernels/portable/cpu/util:functional_util", + "//executorch/kernels/portable/cpu/util:math_util", ], ), op_target( @@ -1236,6 +1244,9 @@ ATEN_OPS = ( ), op_target( name = "op_topk", + deps = [ + "//executorch/kernels/portable/cpu/util:math_util", + ] ), op_target( name = "op_transpose_copy", diff --git a/tools/cmake/preset/windows.cmake b/tools/cmake/preset/windows.cmake index 52397e73186..72118b9177a 100644 --- a/tools/cmake/preset/windows.cmake +++ b/tools/cmake/preset/windows.cmake @@ -6,6 +6,7 @@ # keep sorted +set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_EVALUE_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON) @@ -21,7 +22,5 @@ set_overridable_option(XNNPACK_ENABLE_AVX256VNNIGFNI OFF) set_overridable_option(XNNPACK_ENABLE_AVX512BF16 OFF) # Below options are not yet buildable on Windows, but should be. -set(EXECUTORCH_BUILD_PORTABLE_OPS OFF CACHE BOOL "") -#set_overridable_option(EXECUTORCH_BUILD_EXECUTOR_RUNNER ON) #set_overridable_option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED ON) #set_overridable_option(EXECUTORCH_BUILD_KERNELS_QUANTIZED ON)