From 2bacc8fdca78330ac340805501360d33c23e6d7e Mon Sep 17 00:00:00 2001 From: Ivaylo Enchev Date: Mon, 25 Aug 2025 11:13:51 -0700 Subject: [PATCH] ET use compiler macros in ops Use new compiler macros to fix operator library builds. Additional issues, isnan and signbit supported types are only on float, double and long double. Apply static cast to avoid msvc compiler error. Differential Revision: [D78854136](https://our.internmc.facebook.com/intern/diff/D78854136/) [ghstack-poisoned] --- kernels/portable/cpu/op_add.cpp | 4 +- kernels/portable/cpu/op_addmm.cpp | 2 +- kernels/portable/cpu/op_amax.cpp | 2 +- kernels/portable/cpu/op_amin.cpp | 2 +- kernels/portable/cpu/op_argmax.cpp | 2 +- kernels/portable/cpu/op_argmin.cpp | 2 +- kernels/portable/cpu/op_atan2.cpp | 2 +- kernels/portable/cpu/op_bitwise_and.cpp | 4 +- kernels/portable/cpu/op_bitwise_or.cpp | 4 +- kernels/portable/cpu/op_bitwise_xor.cpp | 4 +- kernels/portable/cpu/op_clamp.cpp | 4 +- kernels/portable/cpu/op_convolution.cpp | 2 +- kernels/portable/cpu/op_copy.cpp | 4 +- kernels/portable/cpu/op_cumsum.cpp | 2 +- kernels/portable/cpu/op_div.cpp | 8 +-- kernels/portable/cpu/op_elu.cpp | 2 +- kernels/portable/cpu/op_eq.cpp | 4 +- kernels/portable/cpu/op_floor_divide.cpp | 2 +- kernels/portable/cpu/op_fmod.cpp | 4 +- kernels/portable/cpu/op_ge.cpp | 4 +- kernels/portable/cpu/op_glu.cpp | 2 +- kernels/portable/cpu/op_gt.cpp | 4 +- kernels/portable/cpu/op_le.cpp | 4 +- kernels/portable/cpu/op_logical_and.cpp | 2 +- kernels/portable/cpu/op_logical_or.cpp | 2 +- kernels/portable/cpu/op_logical_xor.cpp | 2 +- kernels/portable/cpu/op_lt.cpp | 4 +- kernels/portable/cpu/op_max.cpp | 6 +-- kernels/portable/cpu/op_maximum.cpp | 2 +- kernels/portable/cpu/op_mean.cpp | 2 +- kernels/portable/cpu/op_min.cpp | 6 +-- kernels/portable/cpu/op_minimum.cpp | 2 +- kernels/portable/cpu/op_mul.cpp | 4 +- kernels/portable/cpu/op_native_dropout.cpp | 3 +- kernels/portable/cpu/op_ne.cpp | 4 +- kernels/portable/cpu/op_neg.cpp | 2 +- kernels/portable/cpu/op_pow.cpp | 6 +-- kernels/portable/cpu/op_relu.cpp | 2 +- kernels/portable/cpu/op_remainder.cpp | 4 +- kernels/portable/cpu/op_rsub.cpp | 2 +- kernels/portable/cpu/op_sigmoid.cpp | 2 +- kernels/portable/cpu/op_sign.cpp | 2 +- kernels/portable/cpu/op_sub.cpp | 4 +- kernels/portable/cpu/op_sum.cpp | 2 +- kernels/portable/cpu/op_topk.cpp | 2 +- kernels/portable/cpu/op_tril.cpp | 8 +-- kernels/portable/cpu/op_where.cpp | 2 +- kernels/portable/cpu/targets.bzl | 1 + kernels/portable/cpu/util/elementwise_util.h | 2 +- kernels/portable/cpu/util/math_util.h | 2 +- kernels/portable/cpu/vec_ops.h | 51 ++++++++++---------- 51 files changed, 105 insertions(+), 102 deletions(-) diff --git a/kernels/portable/cpu/op_add.cpp b/kernels/portable/cpu/op_add.cpp index 7dead2bf5a7..544d054e7bd 100644 --- a/kernels/portable/cpu/op_add.cpp +++ b/kernels/portable/cpu/op_add.cpp @@ -49,7 +49,7 @@ Tensor& add_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "add.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "add.out"; if (executorch::runtime::isComplexType(a.scalar_type()) || executorch::runtime::isComplexType(b.scalar_type()) || @@ -125,7 +125,7 @@ Tensor& add_scalar_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "add.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "add.Scalar_out"; ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { CTYPE_COMPUTE val_b = utils::scalar_to(b); diff --git a/kernels/portable/cpu/op_addmm.cpp b/kernels/portable/cpu/op_addmm.cpp index 440a8b2c0fa..e1a5d0885eb 100644 --- a/kernels/portable/cpu/op_addmm.cpp +++ b/kernels/portable/cpu/op_addmm.cpp @@ -54,7 +54,7 @@ Tensor& addmm_out( ET_KERNEL_CHECK(ctx, tensor_is_default_dim_order(in), InvalidArgument, out); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "addmm.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "addmm.out"; ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&]() { CTYPE alpha_val = utils::scalar_to(alpha); diff --git a/kernels/portable/cpu/op_amax.cpp b/kernels/portable/cpu/op_amax.cpp index 192fad5c908..323ff3df67e 100644 --- a/kernels/portable/cpu/op_amax.cpp +++ b/kernels/portable/cpu/op_amax.cpp @@ -55,7 +55,7 @@ Tensor& amax_out( for (const auto out_ix : c10::irange(begin, end)) { out_data[out_ix] = plan.execute( [](CTYPE v, CTYPE max_v) { - return std::isnan(v) || v > max_v ? v : max_v; + return std::isnan(static_cast(v)) || v > max_v ? v : max_v; }, out_ix); } diff --git a/kernels/portable/cpu/op_amin.cpp b/kernels/portable/cpu/op_amin.cpp index d4e9be4f4e0..5611e872e29 100644 --- a/kernels/portable/cpu/op_amin.cpp +++ b/kernels/portable/cpu/op_amin.cpp @@ -54,7 +54,7 @@ Tensor& amin_out( for (const auto out_ix : c10::irange(begin, end)) { out_data[out_ix] = plan.execute( [](CTYPE v, CTYPE min_v) { - return std::isnan(v) || v < min_v ? v : min_v; + return std::isnan(static_cast(v)) || v < min_v ? v : min_v; }, out_ix); } diff --git a/kernels/portable/cpu/op_argmax.cpp b/kernels/portable/cpu/op_argmax.cpp index 0e62c049082..5da78b5c548 100644 --- a/kernels/portable/cpu/op_argmax.cpp +++ b/kernels/portable/cpu/op_argmax.cpp @@ -58,7 +58,7 @@ Tensor& argmax_out( // the below condition as written is equivalent to // !isnan(accval) && (isnan(v) || v > acc_val). See // argument in op_argmin.cpp. - if (!std::isnan(acc_val) && !(v <= acc_val)) { + if (!std::isnan(static_cast(acc_val)) && !(v <= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_argmin.cpp b/kernels/portable/cpu/op_argmin.cpp index d422610769f..f0d32e497ec 100644 --- a/kernels/portable/cpu/op_argmin.cpp +++ b/kernels/portable/cpu/op_argmin.cpp @@ -65,7 +65,7 @@ Tensor& argmin_out( // - false, so the result is true. The result is trivially // - true for the above condition that uses isnan(v) as // - well. - if (!std::isnan(acc_val) && !(v >= acc_val)) { + if (!std::isnan(static_cast(acc_val)) && !(v >= acc_val)) { acc_val = v; acc_ix = ix; } diff --git a/kernels/portable/cpu/op_atan2.cpp b/kernels/portable/cpu/op_atan2.cpp index 5390eb52820..52b059442fe 100644 --- a/kernels/portable/cpu/op_atan2.cpp +++ b/kernels/portable/cpu/op_atan2.cpp @@ -52,7 +52,7 @@ Tensor& atan2_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "atan2.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "atan2.out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_bitensor_elementwise_fn< diff --git a/kernels/portable/cpu/op_bitwise_and.cpp b/kernels/portable/cpu/op_bitwise_and.cpp index 609dcb1e949..ac7d6ececdf 100644 --- a/kernels/portable/cpu/op_bitwise_and.cpp +++ b/kernels/portable/cpu/op_bitwise_and.cpp @@ -20,7 +20,7 @@ Tensor& bitwise_and_Tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "bitwise_and.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "bitwise_and.Tensor_out"; return internal::bitwise_tensor_out(ctx, a, b, out); } @@ -30,7 +30,7 @@ Tensor& bitwise_and_Scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "bitwise_and.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "bitwise_and.Scalar_out"; return internal::bitwise_scalar_out(ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_bitwise_or.cpp b/kernels/portable/cpu/op_bitwise_or.cpp index 42cb2a6c3ba..ef4c97d4601 100644 --- a/kernels/portable/cpu/op_bitwise_or.cpp +++ b/kernels/portable/cpu/op_bitwise_or.cpp @@ -20,7 +20,7 @@ Tensor& bitwise_or_Tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "bitwise_or.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "bitwise_or.Tensor_out"; return internal::bitwise_tensor_out(ctx, a, b, out); } @@ -30,7 +30,7 @@ Tensor& bitwise_or_Scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "bitwise_or.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "bitwise_or.Scalar_out"; return internal::bitwise_scalar_out(ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_bitwise_xor.cpp b/kernels/portable/cpu/op_bitwise_xor.cpp index 5fe4e1708d5..633d4f6811b 100644 --- a/kernels/portable/cpu/op_bitwise_xor.cpp +++ b/kernels/portable/cpu/op_bitwise_xor.cpp @@ -20,7 +20,7 @@ Tensor& bitwise_xor_Tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "bitwise_xor.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "bitwise_xor.Tensor_out"; return internal::bitwise_tensor_out(ctx, a, b, out); } @@ -30,7 +30,7 @@ Tensor& bitwise_xor_Scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "bitwise_xor.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "bitwise_xor.Scalar_out"; return internal::bitwise_scalar_out(ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_clamp.cpp b/kernels/portable/cpu/op_clamp.cpp index b3aa41cda85..6b950f7d2a6 100644 --- a/kernels/portable/cpu/op_clamp.cpp +++ b/kernels/portable/cpu/op_clamp.cpp @@ -132,7 +132,7 @@ Tensor& clamp_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "clamp.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "clamp.out"; ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_unitensor_elementwise_fn< @@ -210,7 +210,7 @@ Tensor& clamp_tensor_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "clamp.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "clamp.Tensor_out"; ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_tritensor_elementwise_fn< diff --git a/kernels/portable/cpu/op_convolution.cpp b/kernels/portable/cpu/op_convolution.cpp index f598ac99444..ebbee84a800 100644 --- a/kernels/portable/cpu/op_convolution.cpp +++ b/kernels/portable/cpu/op_convolution.cpp @@ -410,7 +410,7 @@ Tensor& convolution_out( } // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char name[] = "convolution.out"; + static ET_OP_NAME_SPECIFIER const char name[] = "convolution.out"; ET_SWITCH_REALH_TYPES(in.scalar_type(), ctx, name, CTYPE, [&]() { const auto load_bias = bias.has_value() diff --git a/kernels/portable/cpu/op_copy.cpp b/kernels/portable/cpu/op_copy.cpp index 968231fc42e..40a8a620c80 100644 --- a/kernels/portable/cpu/op_copy.cpp +++ b/kernels/portable/cpu/op_copy.cpp @@ -44,7 +44,7 @@ Tensor& copy_out( ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "copy.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "copy.out"; // Use direct copy fast path if broadcast is not needed and tensors are // non-empty @@ -86,7 +86,7 @@ Tensor& copy_( ctx, tensors_have_same_dim_order(in, src), InvalidArgument, in); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "copy_"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "copy_"; // Use direct copy fast path if broadcast is not needed and tensors are // non-empty diff --git a/kernels/portable/cpu/op_cumsum.cpp b/kernels/portable/cpu/op_cumsum.cpp index 3a518d30715..ec0138fe985 100644 --- a/kernels/portable/cpu/op_cumsum.cpp +++ b/kernels/portable/cpu/op_cumsum.cpp @@ -109,7 +109,7 @@ Tensor& cumsum_out( dim = (self.dim() == 0) ? 0 : dim < 0 ? dim + self.dim() : dim; // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "cumsum.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "cumsum.out"; ET_SWITCH_REALHBBF16_TYPES(out.scalar_type(), ctx, op_name, CTYPE_OUT, [&]() { const auto load_self = diff --git a/kernels/portable/cpu/op_div.cpp b/kernels/portable/cpu/op_div.cpp index 51a65747b33..287504fa910 100644 --- a/kernels/portable/cpu/op_div.cpp +++ b/kernels/portable/cpu/op_div.cpp @@ -55,7 +55,7 @@ Tensor& div_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "div.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "div.out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_bitensor_elementwise_fn< @@ -116,7 +116,7 @@ Tensor& div_out_mode( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "div.out_mode"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "div.out_mode"; const bool mode_is_trunc = mode_val == "trunc"; bool div_by_zero_error = false; @@ -187,7 +187,7 @@ Tensor& div_scalar_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "div.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "div.Scalar_out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); @@ -255,7 +255,7 @@ Tensor& div_scalar_mode_out( const bool mode_is_trunc = mode_val == "trunc"; // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "div.Scalar_mode_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "div.Scalar_mode_out"; ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); diff --git a/kernels/portable/cpu/op_elu.cpp b/kernels/portable/cpu/op_elu.cpp index d7477717a3a..729c9d2deee 100644 --- a/kernels/portable/cpu/op_elu.cpp +++ b/kernels/portable/cpu/op_elu.cpp @@ -33,7 +33,7 @@ Tensor& elu_out( ET_KERNEL_CHECK(ctx, tensors_have_same_dtype(in, out), InvalidArgument, out); - static constexpr const char op_name[] = "elu.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "elu.out"; ET_SWITCH_FLOATHBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&]() { using MathT = std:: conditional_t, float, CTYPE>; diff --git a/kernels/portable/cpu/op_eq.cpp b/kernels/portable/cpu/op_eq.cpp index 9e21b82c43c..8722d3e4235 100644 --- a/kernels/portable/cpu/op_eq.cpp +++ b/kernels/portable/cpu/op_eq.cpp @@ -20,7 +20,7 @@ Tensor& eq_tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "eq.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "eq.Tensor_out"; return internal::comparison_tensor_out( ctx, a, b, out); } @@ -31,7 +31,7 @@ Tensor& eq_scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "eq.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "eq.Scalar_out"; return internal::comparison_scalar_out( ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_floor_divide.cpp b/kernels/portable/cpu/op_floor_divide.cpp index 50723c3fa0a..a958f71c75f 100644 --- a/kernels/portable/cpu/op_floor_divide.cpp +++ b/kernels/portable/cpu/op_floor_divide.cpp @@ -48,7 +48,7 @@ Tensor& floor_divide_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "floor_divide.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "floor_divide.out"; bool div_by_zero_error = false; diff --git a/kernels/portable/cpu/op_fmod.cpp b/kernels/portable/cpu/op_fmod.cpp index 40bb4a5e94c..b1027dcc6dd 100644 --- a/kernels/portable/cpu/op_fmod.cpp +++ b/kernels/portable/cpu/op_fmod.cpp @@ -50,7 +50,7 @@ Tensor& fmod_Tensor_out( } // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "fmod.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "fmod.Tensor_out"; bool div_by_zero_error = false; @@ -130,7 +130,7 @@ Tensor& fmod_Scalar_out( } // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "fmod.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "fmod.Scalar_out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); diff --git a/kernels/portable/cpu/op_ge.cpp b/kernels/portable/cpu/op_ge.cpp index d5e7576b7ae..79af61a70b9 100644 --- a/kernels/portable/cpu/op_ge.cpp +++ b/kernels/portable/cpu/op_ge.cpp @@ -20,7 +20,7 @@ Tensor& ge_tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "ge.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "ge.Tensor_out"; return internal::comparison_tensor_out( ctx, a, b, out); } @@ -31,7 +31,7 @@ Tensor& ge_scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "ge.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "ge.Scalar_out"; return internal::comparison_scalar_out( ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_glu.cpp b/kernels/portable/cpu/op_glu.cpp index f204b0fd516..5024c590a8a 100644 --- a/kernels/portable/cpu/op_glu.cpp +++ b/kernels/portable/cpu/op_glu.cpp @@ -91,7 +91,7 @@ Tensor& glu_out_tensor( ? self.scalar_type() : ScalarType::Float; // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "glu.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "glu.out"; ET_SWITCH_FLOATHBF16_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_bitensor_elementwise_fn< CTYPE_COMPUTE, diff --git a/kernels/portable/cpu/op_gt.cpp b/kernels/portable/cpu/op_gt.cpp index cd65a3b68d9..a148edee414 100644 --- a/kernels/portable/cpu/op_gt.cpp +++ b/kernels/portable/cpu/op_gt.cpp @@ -20,7 +20,7 @@ Tensor& gt_tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "gt.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "gt.Tensor_out"; return internal::comparison_tensor_out(ctx, a, b, out); } @@ -30,7 +30,7 @@ Tensor& gt_scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "gt.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "gt.Scalar_out"; return internal::comparison_scalar_out(ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_le.cpp b/kernels/portable/cpu/op_le.cpp index 909de1bfad2..f7a57d130df 100644 --- a/kernels/portable/cpu/op_le.cpp +++ b/kernels/portable/cpu/op_le.cpp @@ -20,7 +20,7 @@ Tensor& le_tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "le.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "le.Tensor_out"; return internal::comparison_tensor_out( ctx, a, b, out); } @@ -31,7 +31,7 @@ Tensor& le_scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "le.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "le.Scalar_out"; return internal::comparison_scalar_out( ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_logical_and.cpp b/kernels/portable/cpu/op_logical_and.cpp index 361c9a3dbc5..7f8bc24c0cd 100644 --- a/kernels/portable/cpu/op_logical_and.cpp +++ b/kernels/portable/cpu/op_logical_and.cpp @@ -27,7 +27,7 @@ Tensor& logical_and_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "logical_and.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "logical_and.out"; return internal::logical_tensor_out(logical_and, ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_logical_or.cpp b/kernels/portable/cpu/op_logical_or.cpp index d654104a69e..426924ce9cb 100644 --- a/kernels/portable/cpu/op_logical_or.cpp +++ b/kernels/portable/cpu/op_logical_or.cpp @@ -27,7 +27,7 @@ Tensor& logical_or_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "logical_or.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "logical_or.out"; return internal::logical_tensor_out(logical_or, ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_logical_xor.cpp b/kernels/portable/cpu/op_logical_xor.cpp index 854dd8e572f..e3bd49c5686 100644 --- a/kernels/portable/cpu/op_logical_xor.cpp +++ b/kernels/portable/cpu/op_logical_xor.cpp @@ -27,7 +27,7 @@ Tensor& logical_xor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "logical_xor.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "logical_xor.out"; return internal::logical_tensor_out(logical_xor, ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_lt.cpp b/kernels/portable/cpu/op_lt.cpp index 5af89920536..852f110b900 100644 --- a/kernels/portable/cpu/op_lt.cpp +++ b/kernels/portable/cpu/op_lt.cpp @@ -20,7 +20,7 @@ Tensor& lt_tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "lt.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "lt.Tensor_out"; return internal::comparison_tensor_out(ctx, a, b, out); } @@ -30,7 +30,7 @@ Tensor& lt_scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "lt.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "lt.Scalar_out"; return internal::comparison_scalar_out(ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_max.cpp b/kernels/portable/cpu/op_max.cpp index 3f4a1d27c0e..f744b616805 100644 --- a/kernels/portable/cpu/op_max.cpp +++ b/kernels/portable/cpu/op_max.cpp @@ -88,8 +88,8 @@ std::tuple max_out( for (const auto out_ix : c10::irange(begin, end)) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && - (std::isnan(v) || v > acc_val)) { + if (!std::isnan(static_cast(acc_val)) && + (std::isnan(static_cast(v)) || v > acc_val)) { acc_val = v; acc_ix = ix; } @@ -132,7 +132,7 @@ max_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { data_out[0] = lower_bound(); for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); - if (std::isnan(val)) { + if (std::isnan(static_cast(val))) { data_out[0] = val; break; } diff --git a/kernels/portable/cpu/op_maximum.cpp b/kernels/portable/cpu/op_maximum.cpp index c7979e40d7c..bc4deecae2f 100644 --- a/kernels/portable/cpu/op_maximum.cpp +++ b/kernels/portable/cpu/op_maximum.cpp @@ -42,7 +42,7 @@ Tensor& maximum_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "maximum.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "maximum.out"; ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_bitensor_elementwise_fn< diff --git a/kernels/portable/cpu/op_mean.cpp b/kernels/portable/cpu/op_mean.cpp index 738fa98c9eb..503f680e236 100644 --- a/kernels/portable/cpu/op_mean.cpp +++ b/kernels/portable/cpu/op_mean.cpp @@ -47,7 +47,7 @@ Tensor& mean_dim_out( MapReduceOverDimListPlan plan(in, dim_list); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "mean.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "mean.out"; ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] { ET_SWITCH_FLOATHBF16_TYPES(out.scalar_type(), ctx, op_name, CTYPE_OUT, [&] { CTYPE_OUT* out_data = out.mutable_data_ptr(); diff --git a/kernels/portable/cpu/op_min.cpp b/kernels/portable/cpu/op_min.cpp index 8b70bcd40f5..3ae9cc3c58f 100644 --- a/kernels/portable/cpu/op_min.cpp +++ b/kernels/portable/cpu/op_min.cpp @@ -88,8 +88,8 @@ std::tuple min_out( for (const auto out_ix : c10::irange(begin, end)) { std::tuple acc = reduce_over_dim( [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) { - if (!std::isnan(acc_val) && - (std::isnan(v) || v < acc_val)) { + if (!std::isnan(static_cast(acc_val)) && + (std::isnan(static_cast(v)) || v < acc_val)) { acc_val = v; acc_ix = ix; } @@ -132,7 +132,7 @@ min_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { data_out[0] = upper_bound(); for (const auto i : c10::irange(in.numel())) { CTYPE_OUT val = static_cast(data_in[i]); - if (std::isnan(val)) { + if (std::isnan(static_cast(val))) { data_out[0] = val; break; } diff --git a/kernels/portable/cpu/op_minimum.cpp b/kernels/portable/cpu/op_minimum.cpp index 1bac23187d8..d96b8d72c98 100644 --- a/kernels/portable/cpu/op_minimum.cpp +++ b/kernels/portable/cpu/op_minimum.cpp @@ -42,7 +42,7 @@ Tensor& minimum_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "minimum.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "minimum.out"; ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_bitensor_elementwise_fn< diff --git a/kernels/portable/cpu/op_mul.cpp b/kernels/portable/cpu/op_mul.cpp index 6d4f30106ca..638eb87f83a 100644 --- a/kernels/portable/cpu/op_mul.cpp +++ b/kernels/portable/cpu/op_mul.cpp @@ -42,7 +42,7 @@ Tensor& mul_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "mul.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "mul.out"; ET_KERNEL_CHECK( ctx, @@ -108,7 +108,7 @@ Tensor& mul_scalar_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "mul.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "mul.Scalar_out"; ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); diff --git a/kernels/portable/cpu/op_native_dropout.cpp b/kernels/portable/cpu/op_native_dropout.cpp index 8dafd9e0512..24162f77a69 100644 --- a/kernels/portable/cpu/op_native_dropout.cpp +++ b/kernels/portable/cpu/op_native_dropout.cpp @@ -45,7 +45,8 @@ std::tuple native_dropout_out( prob); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "native_dropout.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "native_dropout.out"; + if ((!train.has_value() || train.value()) && prob != 0) { { std::mt19937 gen((std::random_device())()); diff --git a/kernels/portable/cpu/op_ne.cpp b/kernels/portable/cpu/op_ne.cpp index a4b292359df..7878d8ef2e0 100644 --- a/kernels/portable/cpu/op_ne.cpp +++ b/kernels/portable/cpu/op_ne.cpp @@ -20,7 +20,7 @@ Tensor& ne_tensor_out( const Tensor& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "ne.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "ne.Tensor_out"; return internal::comparison_tensor_out( ctx, a, b, out); } @@ -31,7 +31,7 @@ Tensor& ne_scalar_out( const Scalar& b, Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "ne.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "ne.Scalar_out"; return internal::comparison_scalar_out( ctx, a, b, out); } diff --git a/kernels/portable/cpu/op_neg.cpp b/kernels/portable/cpu/op_neg.cpp index d184eb873d5..e5e3e801cf9 100644 --- a/kernels/portable/cpu/op_neg.cpp +++ b/kernels/portable/cpu/op_neg.cpp @@ -33,7 +33,7 @@ Tensor& neg_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { ET_KERNEL_CHECK( ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); - static constexpr const char op_name[] = "neg.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "neg.out"; ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&] { utils::internal::apply_unitensor_elementwise_fn< CTYPE, diff --git a/kernels/portable/cpu/op_pow.cpp b/kernels/portable/cpu/op_pow.cpp index aaf934b9adf..b0fc9dad726 100644 --- a/kernels/portable/cpu/op_pow.cpp +++ b/kernels/portable/cpu/op_pow.cpp @@ -50,7 +50,7 @@ Tensor& pow_Tensor_Tensor_out( } // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "pow.Tensor_Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "pow.Tensor_Tensor_out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_bitensor_elementwise_fn< @@ -102,7 +102,7 @@ Tensor& pow_Tensor_Scalar_out( } // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "pow.Tensor_Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "pow.Tensor_Scalar_out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); @@ -157,7 +157,7 @@ Tensor& pow_Scalar_out( } // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "pow.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "pow.Scalar_out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_a = utils::scalar_to(a); diff --git a/kernels/portable/cpu/op_relu.cpp b/kernels/portable/cpu/op_relu.cpp index 973542a2a77..2469dfbfdfc 100644 --- a/kernels/portable/cpu/op_relu.cpp +++ b/kernels/portable/cpu/op_relu.cpp @@ -45,7 +45,7 @@ Tensor& relu_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "relu.out", CTYPE, [&]() { apply_unary_map_fn( [](const CTYPE val_in) { - return (std::isnan(val_in) || val_in >= CTYPE(0)) ? val_in : CTYPE(0); + return (std::isnan(static_cast(val_in)) || val_in >= CTYPE(0)) ? val_in : CTYPE(0); }, in.const_data_ptr(), out.mutable_data_ptr(), diff --git a/kernels/portable/cpu/op_remainder.cpp b/kernels/portable/cpu/op_remainder.cpp index 01a5d72de01..141e536c209 100644 --- a/kernels/portable/cpu/op_remainder.cpp +++ b/kernels/portable/cpu/op_remainder.cpp @@ -48,7 +48,7 @@ Tensor& remainder_Tensor_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "remainder.Tensor_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "remainder.Tensor_out"; bool div_by_zero_error = false; @@ -125,7 +125,7 @@ Tensor& remainder_Scalar_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "remainder.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "remainder.Scalar_out"; ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); diff --git a/kernels/portable/cpu/op_rsub.cpp b/kernels/portable/cpu/op_rsub.cpp index 6a0a77b6596..5588d9044f0 100644 --- a/kernels/portable/cpu/op_rsub.cpp +++ b/kernels/portable/cpu/op_rsub.cpp @@ -47,7 +47,7 @@ Tensor& rsub_scalar_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "rsub.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "rsub.Scalar_out"; ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); diff --git a/kernels/portable/cpu/op_sigmoid.cpp b/kernels/portable/cpu/op_sigmoid.cpp index 0578c846ab7..e50171ba433 100644 --- a/kernels/portable/cpu/op_sigmoid.cpp +++ b/kernels/portable/cpu/op_sigmoid.cpp @@ -40,7 +40,7 @@ Tensor& sigmoid_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { compute_type = utils::get_compute_type(compute_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "sigmoid.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "sigmoid.out"; ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_unitensor_elementwise_fn< diff --git a/kernels/portable/cpu/op_sign.cpp b/kernels/portable/cpu/op_sign.cpp index e6945094973..9b277964d05 100644 --- a/kernels/portable/cpu/op_sign.cpp +++ b/kernels/portable/cpu/op_sign.cpp @@ -42,7 +42,7 @@ Tensor& sign_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, "sign.out", CTYPE, [&] { apply_unary_map_fn( [](const CTYPE val_in) { - if (std::isnan(val_in)) { + if (std::isnan(static_cast(val_in))) { return val_in; } else { return static_cast((val_in > 0) - (val_in < 0)); diff --git a/kernels/portable/cpu/op_sub.cpp b/kernels/portable/cpu/op_sub.cpp index b914c411303..fddfa19f49b 100644 --- a/kernels/portable/cpu/op_sub.cpp +++ b/kernels/portable/cpu/op_sub.cpp @@ -52,7 +52,7 @@ Tensor& sub_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "sub.out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "sub.out"; ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_alpha = utils::scalar_to(alpha); @@ -107,7 +107,7 @@ Tensor& sub_scalar_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "sub.Scalar_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "sub.Scalar_out"; ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { const CTYPE_COMPUTE val_b = utils::scalar_to(b); diff --git a/kernels/portable/cpu/op_sum.cpp b/kernels/portable/cpu/op_sum.cpp index dcd81797dcf..d3f8d0a733b 100644 --- a/kernels/portable/cpu/op_sum.cpp +++ b/kernels/portable/cpu/op_sum.cpp @@ -51,7 +51,7 @@ Tensor& sum_dim_out( plan.emplace(in, dim_list); } // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "sum.IntList_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "sum.IntList_out"; if (executorch::runtime::isComplexType(in.scalar_type())) { ET_KERNEL_CHECK( diff --git a/kernels/portable/cpu/op_topk.cpp b/kernels/portable/cpu/op_topk.cpp index e35e67193bf..458f8c2698a 100644 --- a/kernels/portable/cpu/op_topk.cpp +++ b/kernels/portable/cpu/op_topk.cpp @@ -62,7 +62,7 @@ bool float_less_than(T x, T y) { if constexpr (std::is_integral_v) { return x < y; } - return (!std::isnan(x) && std::isnan(y)) || x < y; + return (!std::isnan(static_cast(x)) && std::isnan(static_cast(y))) || x < y; } template > diff --git a/kernels/portable/cpu/op_tril.cpp b/kernels/portable/cpu/op_tril.cpp index b21c9918a99..5fb43928883 100644 --- a/kernels/portable/cpu/op_tril.cpp +++ b/kernels/portable/cpu/op_tril.cpp @@ -38,8 +38,8 @@ Tensor& clear_out(Tensor& out) { */ template void apply_tril( - CTYPE* __restrict__ self, - CTYPE* __restrict__ out, + CTYPE* ET_RESTRICT self, + CTYPE* ET_RESTRICT out, int64_t diagonal, int64_t num_rows, int64_t num_cols, @@ -104,8 +104,8 @@ void tril_kernel( int64_t col_stride = strides_ref[ndim - 1]; for (const auto i : c10::irange(batch_size)) { - CTYPE* __restrict__ data_self_ptr = &data_self[i * self_stride]; - CTYPE* __restrict__ data_out_ptr = &data_out[i * self_stride]; + CTYPE* ET_RESTRICT data_self_ptr = &data_self[i * self_stride]; + CTYPE* ET_RESTRICT data_out_ptr = &data_out[i * self_stride]; apply_tril( data_self_ptr, diff --git a/kernels/portable/cpu/op_where.cpp b/kernels/portable/cpu/op_where.cpp index b1eb4ff442c..400076f7347 100644 --- a/kernels/portable/cpu/op_where.cpp +++ b/kernels/portable/cpu/op_where.cpp @@ -40,7 +40,7 @@ Tensor& where_out( ScalarType compute_type = utils::get_compute_type(common_type); // @lint-ignore CLANGTIDY facebook-hte-CArray - static constexpr const char op_name[] = "where.self_out"; + static ET_OP_NAME_SPECIFIER const char op_name[] = "where.self_out"; ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { utils::apply_tritensor_elementwise_fn< diff --git a/kernels/portable/cpu/targets.bzl b/kernels/portable/cpu/targets.bzl index 2d4848f9588..83e8219402f 100644 --- a/kernels/portable/cpu/targets.bzl +++ b/kernels/portable/cpu/targets.bzl @@ -43,6 +43,7 @@ def define_common_targets(): name = "vec_ops", exported_deps = [ "//executorch/runtime/core/portable_type/c10/c10:c10", + "//executorch/runtime/platform:compiler", ], srcs = [], exported_headers = ["vec_ops.h"], diff --git a/kernels/portable/cpu/util/elementwise_util.h b/kernels/portable/cpu/util/elementwise_util.h index cc1110e10d7..69ffc0bc647 100644 --- a/kernels/portable/cpu/util/elementwise_util.h +++ b/kernels/portable/cpu/util/elementwise_util.h @@ -531,7 +531,7 @@ inline void apply_tritensor_elementwise_fn( * can't pass a string literal for op_name. Instead, you should do the * following: * - * static constexpr const char op_name[] = "my_op"; + * static ET_OP_NAME_SPECIFIER const char op_name[] = "my_op"; * apply_ternary_elementwise_fn. * * See [NOTE: Generic lambdas] if you want to pass a generic lambda for diff --git a/kernels/portable/cpu/util/math_util.h b/kernels/portable/cpu/util/math_util.h index 2c4828b9e6e..61cab3cb127 100644 --- a/kernels/portable/cpu/util/math_util.h +++ b/kernels/portable/cpu/util/math_util.h @@ -29,7 +29,7 @@ template < typename std::enable_if::value, bool>::type = true> INT_T floor_divide(INT_T a, INT_T b) { const auto quot = a / b; - if (std::signbit(a) == std::signbit(b)) { + if (std::signbit(static_cast(a)) == std::signbit(static_cast(b))) { return quot; } const auto rem = a % b; diff --git a/kernels/portable/cpu/vec_ops.h b/kernels/portable/cpu/vec_ops.h index 7a1a488701b..1a629fa6c89 100644 --- a/kernels/portable/cpu/vec_ops.h +++ b/kernels/portable/cpu/vec_ops.h @@ -18,6 +18,7 @@ #include #include #include +#include /** * @file * This header defines common, low-level operations that can often be @@ -45,9 +46,9 @@ inline float vec_maxf(const float* x, size_t size) { /// Add each element of `x` and `y` into the corresponding element of `z`. All /// arrays must have `size` elements. inline void vec_addf( - float* __restrict__ z, - const float* __restrict__ x, - const float* __restrict__ y, + float* ET_RESTRICT z, + const float* ET_RESTRICT x, + const float* ET_RESTRICT y, size_t size) { for (const auto i : c10::irange(size)) { z[i] = x[i] + y[i]; @@ -57,8 +58,8 @@ inline void vec_addf( /// Multiplies every element of `x` by `scale`, and writes the result into the /// corresponding element of `y`. `x` and `y` must have `size` elements. inline void vec_scalef( - float* __restrict__ y, - const float* __restrict__ x, + float* ET_RESTRICT y, + const float* ET_RESTRICT x, float scale, size_t size) { for (const auto i : c10::irange(size)) { @@ -70,9 +71,9 @@ inline void vec_scalef( /// z[i][j] = sum(x[i][k] * y[k][j]) template inline void vec_matmul( - T* __restrict__ z, - const U* __restrict__ x, - const U* __restrict__ y, + T* ET_RESTRICT z, + const U* ET_RESTRICT x, + const U* ET_RESTRICT y, int64_t m, int64_t n, int64_t p) { @@ -89,10 +90,10 @@ inline void vec_matmul( template inline void vec_quantized_matmul_int8( - T* __restrict__ z, - const U* __restrict__ x, - const int8_t* __restrict__ y, - const U* __restrict__ s, + T* ET_RESTRICT z, + const U* ET_RESTRICT x, + const int8_t* ET_RESTRICT y, + const U* ET_RESTRICT s, int64_t m, int64_t n, int64_t p) { @@ -115,10 +116,10 @@ static inline size_t bounds_min(size_t a, size_t b) { /// z[i][j] = sum(x[i][k] * y[j][k] * s[j][k/g]) template inline void vec_quantized_matmul_transb_int8( - T* __restrict__ z, - const U* __restrict__ x, - const int8_t* __restrict__ y, - const V* __restrict__ s, + T* ET_RESTRICT z, + const U* ET_RESTRICT x, + const int8_t* ET_RESTRICT y, + const V* ET_RESTRICT s, int64_t m, int64_t n, int64_t p, @@ -146,10 +147,10 @@ inline void vec_quantized_matmul_transb_int8( // T for tensor dtype, U for scalar type template inline void vec_addmm( - T* __restrict__ out_data, - const T* __restrict__ self_data, - const T* __restrict__ mat1_data, - const T* __restrict__ mat2_data, + T* ET_RESTRICT out_data, + const T* ET_RESTRICT self_data, + const T* ET_RESTRICT mat1_data, + const T* ET_RESTRICT mat2_data, int64_t m, int64_t n, int64_t p, @@ -195,7 +196,7 @@ template < typename checkU = typename std::enable_if< std::is_same::type>::value || std::is_same::type>::value>::type> -inline void vec_softmax(T* __restrict__ y, const U* __restrict__ x, int n) { +inline void vec_softmax(T* ET_RESTRICT y, const U* ET_RESTRICT x, int n) { U max_x = *std::max_element(x, x + n); T sum = 0; @@ -223,8 +224,8 @@ constexpr const T& clamp(const T& v, const T& lo, const T& hi) { /// Quantizes the elements of `x` into `y`, both of which must have `size` /// elements. Inverse of `dequantize_i8_f32()`. inline void quantize_i8_f32( - int8_t* __restrict__ y, - const float* __restrict__ x, + int8_t* ET_RESTRICT y, + const float* ET_RESTRICT x, float scale, int32_t zero_point, size_t size) { @@ -237,8 +238,8 @@ inline void quantize_i8_f32( /// Dequantizes the elements of `x` into `y`, both of which must have `size` /// elements. Inverse of `quantize_i8_f32()`. inline void dequantize_i8_f32( - float* __restrict__ y, - const int8_t* __restrict__ x, + float* ET_RESTRICT y, + const int8_t* ET_RESTRICT x, float scale, int32_t zero_point, size_t size) {