Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions kernels/optimized/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ endif()
set(_common_compile_options
$<$<CXX_COMPILER_ID:MSVC>:/wd4996>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations>
$<$<CXX_COMPILER_ID:GNU>:-Wno-psabi>
)

# Note for apple platform we can rely on Accelerate framework Will come back to
Expand Down
6 changes: 3 additions & 3 deletions kernels/optimized/cpu/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Tensor& opt_add_out(
CTYPE b_val = *b.const_data_ptr<CTYPE>();

using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map<CTYPE>(
at::vec::map(
[alpha_val, b_val](Vec x) { return x + Vec(alpha_val * b_val); },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand All @@ -86,7 +86,7 @@ Tensor& opt_add_out(
CTYPE b_casted = static_cast<CTYPE>(b_val);

using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map<CTYPE>(
at::vec::map(
[alpha_val, b_casted](Vec x) {
return x + Vec(alpha_val * b_casted);
},
Expand Down Expand Up @@ -140,7 +140,7 @@ Tensor& opt_add_scalar_out(
ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, );

using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map<CTYPE>(
at::vec::map(
[alpha_val, b_casted](Vec x) {
return x + Vec(alpha_val * b_casted);
},
Expand Down
8 changes: 4 additions & 4 deletions kernels/optimized/cpu/op_div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,14 @@ Tensor& opt_div_out(

using Vec = at::vec::Vectorized<CTYPE>;
if (a.numel() == 1) {
at::vec::map<CTYPE>(
at::vec::map(
[scalar_casted](Vec x) { return Vec(scalar_casted) / x; },
out.mutable_data_ptr<CTYPE>(),
tensor->const_data_ptr<CTYPE>(),
out.numel());
} else {
Vec inv_scalar_casted_vec(CTYPE(1) / scalar_casted);
at::vec::map<CTYPE>(
at::vec::map(
[inv_scalar_casted_vec](Vec x) {
return x * inv_scalar_casted_vec;
},
Expand All @@ -111,7 +111,7 @@ Tensor& opt_div_out(
if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) {
ET_SWITCH_REALB_TYPES(out_type, ctx, op_name, CTYPE, [&]() {
using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map2<CTYPE>(
at::vec::map2(
[](Vec x, Vec y) { return x / y; },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand Down Expand Up @@ -193,7 +193,7 @@ Tensor& opt_div_scalar_out(

using Vec = at::vec::Vectorized<CTYPE>;
Vec inv_b_casted_vec(CTYPE(1) / b_casted);
at::vec::map<CTYPE>(
at::vec::map(
[inv_b_casted_vec](Vec x) { return x * inv_b_casted_vec; },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand Down
2 changes: 1 addition & 1 deletion kernels/optimized/cpu/op_elu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void elu(
0,
out.numel(),
::executorch::extension::internal::GRAIN_SIZE,
[&](const auto begin, const auto end) {
[&](const auto& begin, const auto& end) {
using Vec = at::vec::Vectorized<CTYPE>;
const auto vectorized_begin =
begin + (Vec::size() - begin % Vec::size()) % Vec::size();
Expand Down
3 changes: 1 addition & 2 deletions kernels/optimized/cpu/op_exp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ void exp_data(
const size_t numel,
CTYPE_OUT* out_data) {
using Vec = at::vec::Vectorized<CTYPE_IN>;
at::vec::map<CTYPE_IN>(
[](Vec x) { return x.exp(); }, out_data, in_data, numel);
at::vec::map([](Vec x) { return x.exp(); }, out_data, in_data, numel);
}

/**
Expand Down
4 changes: 2 additions & 2 deletions kernels/optimized/cpu/op_le.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Tensor& opt_le_tensor_out(
if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) {
ET_SWITCH_REALB_TYPES(a_type, ctx, op_name, CTYPE, [&]() {
using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map2<CTYPE>(
at::vec::map2(
[](Vec x, Vec y) { return x.le(y); },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand Down Expand Up @@ -95,7 +95,7 @@ Tensor& opt_le_scalar_out(
ET_EXTRACT_SCALAR(b, b_val);
CTYPE b_casted = static_cast<CTYPE>(b_val);
using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map<CTYPE>(
at::vec::map(
[b_casted](Vec x) { return x.le(Vec(b_casted)); },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand Down
2 changes: 1 addition & 1 deletion kernels/optimized/cpu/op_log_softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void log_softmax_kernel(const Tensor& input, int64_t dim, Tensor& out) {
0,
outer_size,
::executorch::extension::internal::GRAIN_SIZE,
[&](const auto begin, const auto end) {
[&](const auto& begin, const auto& end) {
at::native::serial_vec_log_softmax_lastdim_range(
input_data_base,
output_data_base,
Expand Down
8 changes: 4 additions & 4 deletions kernels/optimized/cpu/op_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Tensor& opt_mul_out(
CTYPE b_casted = static_cast<CTYPE>(b_val);

using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map<CTYPE>(
at::vec::map(
[b_casted](Vec x) { return x * Vec(b_casted); },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand All @@ -76,7 +76,7 @@ Tensor& opt_mul_out(

ET_SWITCH_COMPLEXH_TYPES(out_type, ctx, op_name, CTYPE, [&]() {
using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map2<CTYPE>(
at::vec::map2(
[](Vec x, Vec y) { return x * y; },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand All @@ -86,7 +86,7 @@ Tensor& opt_mul_out(
} else {
ET_SWITCH_REALB_TYPES(out_type, ctx, op_name, CTYPE, [&]() {
using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map2<CTYPE>(
at::vec::map2(
[](Vec x, Vec y) { return x * y; },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand Down Expand Up @@ -173,7 +173,7 @@ Tensor& opt_mul_scalar_out(
CTYPE b_casted = utils::scalar_to<CTYPE>(b);

using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map<CTYPE>(
at::vec::map(
[b_casted](Vec x) { return x * Vec(b_casted); },
out.mutable_data_ptr<CTYPE>(),
a.const_data_ptr<CTYPE>(),
Expand Down
2 changes: 1 addition & 1 deletion kernels/optimized/cpu/op_native_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void layer_norm(
dst_ptr[j] = (src_ptr[j] * scale + offset) * gamma_v + beta_v;
}
} else {
at::vec::map3<CTYPE>(
at::vec::map3(
[scale, offset](auto x, auto gamma, auto beta) {
using Vec = decltype(x);
return (x * Vec(scale) + Vec(offset)) * gamma + beta;
Expand Down
6 changes: 3 additions & 3 deletions kernels/optimized/cpu/op_sub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ Tensor& opt_sub_out(

using Vec = at::vec::Vectorized<CTYPE>;
if (a.numel() == 1) {
at::vec::map<CTYPE>(
at::vec::map(
[alpha_val, scalar_casted](Vec x) {
return Vec(scalar_casted) - Vec(alpha_val) * x;
},
out.mutable_data_ptr<CTYPE>(),
tensor->const_data_ptr<CTYPE>(),
out.numel());
} else {
at::vec::map<CTYPE>(
at::vec::map(
[alpha_val, scalar_casted](Vec x) {
return x - Vec(alpha_val * scalar_casted);
},
Expand Down Expand Up @@ -148,7 +148,7 @@ Tensor& opt_sub_scalar_out(
ctx, utils::extract_scalar(alpha, &alpha_val), InvalidArgument, );

using Vec = at::vec::Vectorized<CTYPE>;
at::vec::map<CTYPE>(
at::vec::map(
[alpha_val, b_casted](Vec x) {
return x - Vec(alpha_val * b_casted);
},
Expand Down
4 changes: 2 additions & 2 deletions kernels/portable/cpu/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Tensor& add_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBBF16>(
[val_alpha](const auto val_a, const auto val_b) {
[val_alpha](const auto& val_a, const auto& val_b) {
return val_a + val_alpha * val_b;
},
ctx,
Expand Down Expand Up @@ -136,7 +136,7 @@ Tensor& add_scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[val_alpha_times_b](const auto val_a) {
[val_alpha_times_b](const auto& val_a) {
// Cast here supports vectorization; either it does nothing
// or it casts from CTYPE_COMPUTE to
// Vectorized<CTYPE_COMPUTE>.
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_addmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Tensor& addmm_out(
CTYPE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
[alpha_val, beta_val](const auto val_a, const auto val_b) {
[alpha_val, beta_val](const auto& val_a, const auto& val_b) {
return val_a * alpha_val + val_b * beta_val;
},
ctx,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_atan2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ Tensor& atan2_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::FLOATHBF16>(
[](const auto val_a, const auto val_b) {
[](const auto& val_a, const auto& val_b) {
return executorch::math::atan2(val_a, val_b);
},
ctx,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_clamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ Tensor& clamp_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[has_min, min_opt, has_max, max_opt](const auto val_in) {
[has_min, min_opt, has_max, max_opt](const auto& val_in) {
auto val_out = val_in;
if (has_min) {
val_out = utils::max_override(
Expand Down
4 changes: 2 additions & 2 deletions kernels/portable/cpu/op_div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Tensor& div_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::FLOATHBF16>(
[](const auto val_a, const auto val_b) { return val_a / val_b; },
[](const auto& val_a, const auto& val_b) { return val_a / val_b; },
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down Expand Up @@ -195,7 +195,7 @@ Tensor& div_scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[val_b](const auto val_a) { return val_a / val_b; },
[val_b](const auto& val_a) { return val_a / val_b; },
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_fmod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ Tensor& fmod_Scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
[val_b](const auto val_a) {
[val_b](const auto& val_a) {
return executorch::math::fmod(val_a, (decltype(val_a))val_b);
},
ctx,
Expand Down
13 changes: 12 additions & 1 deletion kernels/portable/cpu/op_isinf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,18 @@ namespace torch {
namespace executor {
namespace native {

DEFINE_UNARY_UFUNC_REALHBBF16_TO_BOOL(isinf_out, std::isinf)
bool isinf_float(float x) {
return std::isinf(x);
}

bool isinf_double(double x) {
return std::isinf(x);
}

Tensor& isinf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
return internal::unary_ufunc_realhbbf16_to_bool(
isinf_float, isinf_double, ctx, in, out);
}

} // namespace native
} // namespace executor
Expand Down
12 changes: 11 additions & 1 deletion kernels/portable/cpu/op_isnan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,18 @@
namespace torch {
namespace executor {
namespace native {
bool isnan_float(float x) {
return std::isnan(x);
}

DEFINE_UNARY_UFUNC_REALHBBF16_TO_BOOL(isnan_out, std::isnan)
bool isnan_double(double x) {
return std::isnan(x);
}

Tensor& isnan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
return internal::unary_ufunc_realhbbf16_to_bool(
isnan_float, isnan_double, ctx, in, out);
}

} // namespace native
} // namespace executor
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_maximum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Tensor& maximum_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBBF16>(
[](const auto val_a, const auto val_b) {
[](const auto& val_a, const auto& val_b) {
return utils::max_override(val_a, val_b);
},
ctx,
Expand Down
4 changes: 2 additions & 2 deletions kernels/portable/cpu/op_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ Tensor& mul_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBBF16>(
[](const auto val_a, const auto val_b) { return val_a * val_b; },
[](const auto& val_a, const auto& val_b) { return val_a * val_b; },
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down Expand Up @@ -116,7 +116,7 @@ Tensor& mul_scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[val_b](const auto val_a) { return val_a * val_b; },
[val_b](const auto& val_a) { return val_a * val_b; },
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_neg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Tensor& neg_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
CTYPE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[](const auto val_in) { return -val_in; },
[](const auto& val_in) { return -val_in; },
ctx,
in,
utils::SupportedTensorDtypes::REALHBF16,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_pow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Tensor& pow_Tensor_Tensor_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
[](const auto val_a, const auto val_b) {
[](const auto& val_a, const auto& val_b) {
return executorch::math::pow(val_a, val_b);
},
ctx,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_rsub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Tensor& rsub_scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[val_b, val_alpha](const auto val_a) {
[val_b, val_alpha](const auto& val_a) {
return val_b - val_alpha * val_a;
},
ctx,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_sigmoid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Tensor& sigmoid_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::FLOATHBF16>(
[](const auto val_in) {
[](const auto& val_in) {
const auto one = static_cast<decltype(val_in)>(1.0);
auto out_val = one / (one + executorch::math::exp(-val_in));
return out_val;
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_sub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Tensor& sub_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
[val_alpha](const auto val_a, const auto val_b) {
[val_alpha](const auto& val_a, const auto& val_b) {
return val_a - (decltype(val_b))(val_alpha)*val_b;
},
ctx,
Expand Down
Loading