Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,10 @@ exclude_patterns = [
'examples/**',
'exir/verification/bindings.cpp',
'extension/**',
# Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
'kernels/portable/cpu/util/elementwise_util.h',
'kernels/portable/cpu/util/math_util.h',
'kernels/portable/cpu/util/vectorized_math.h',
'kernels/optimized/**',
'runtime/core/exec_aten/**',
# Want to be able to keep c10 in sync with PyTorch core.
Expand Down
2 changes: 1 addition & 1 deletion kernels/optimized/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ message("Generated files ${gen_command_sources}")
list(TRANSFORM _optimized_kernels__srcs PREPEND "${EXECUTORCH_ROOT}/")
add_library(optimized_kernels ${_optimized_kernels__srcs})
target_include_directories(optimized_kernels PRIVATE ${TORCH_INCLUDE_DIRS} "${EXECUTORCH_ROOT}/third-party/pocketfft")
target_compile_definitions(optimized_kernels PRIVATE ET_USE_PYTORCH_HEADERS)
target_compile_definitions(optimized_kernels PRIVATE "ET_USE_PYTORCH_HEADERS=ET_HAS_EXCEPTIONS")
target_link_libraries(
optimized_kernels PUBLIC executorch_core cpublas extension_threadpool kernels_util_all_deps
)
Expand Down
11 changes: 9 additions & 2 deletions kernels/portable/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,16 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
target_link_libraries(optimized_portable_kernels PUBLIC extension_threadpool)
target_compile_options(optimized_portable_kernels PUBLIC ${_common_compile_options})
target_include_directories(optimized_portable_kernels PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_definitions(optimized_portable_kernels PRIVATE ET_USE_PYTORCH_HEADERS)
target_compile_definitions(optimized_portable_kernels PRIVATE "ET_USE_PYTORCH_HEADERS=ET_HAS_EXCEPTIONS")
gen_selected_ops(LIB_NAME "optimized_portable_ops_lib" OPS_SCHEMA_YAML "${_yaml}")
generate_bindings_for_kernels(
LIB_NAME "optimized_portable_ops_lib" FUNCTIONS_YAML "${_yaml}"
)
gen_operators_lib(
LIB_NAME "optimized_portable_ops_lib" KERNEL_LIBS optimized_portable_kernels DEPS executorch_core
)
install(
TARGETS optimized_portable_kernels
TARGETS optimized_portable_kernels optimized_portable_ops_lib
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the optimized_portable_ops_lib mutually exclusive with portable_ops_lib, if so should we only build one depending on EXECUTORCH_BUILD_KERNELS_OPTIMIZED?

cc @larryliu0820

DESTINATION lib
)
endif()
Expand Down
12 changes: 8 additions & 4 deletions kernels/portable/cpu/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,18 @@ Tensor& add_scalar_out(
static constexpr const char op_name[] = "add.Scalar_out";

ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
CTYPE_COMPUTE val_b = utils::scalar_to<CTYPE_COMPUTE>(b);
CTYPE_COMPUTE val_alpha = utils::scalar_to<CTYPE_COMPUTE>(alpha);
auto val_alpha_times_b = val_alpha * val_b;
utils::apply_unitensor_elementwise_fn<
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[b, alpha](const auto val_a) {
CTYPE_COMPUTE val_b = utils::scalar_to<CTYPE_COMPUTE>(b);
CTYPE_COMPUTE val_alpha = utils::scalar_to<CTYPE_COMPUTE>(alpha);
return val_a + val_alpha * val_b;
[val_alpha_times_b](const auto val_a) {
// Cast here supports vectorization; either it does nothing
// or it casts from CTYPE_COMPUTE to
// Vectorized<CTYPE_COMPUTE>.
return val_a + decltype(val_a)(val_alpha_times_b);
},
ctx,
a,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_atan2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Tensor& atan2_out(
op_name,
utils::SupportedTensorDtypes::FLOATHBF16>(
[](const auto val_a, const auto val_b) {
return std::atan2(val_a, val_b);
return executorch::math::atan2(val_a, val_b);
},
ctx,
a,
Expand Down
5 changes: 2 additions & 3 deletions kernels/portable/cpu/op_clamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,8 @@ Tensor& clamp_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[has_min, min_opt, has_max, max_opt](const CTYPE_COMPUTE val_in) {
// TODO: rewrite this to be vectorization-capable.
CTYPE_COMPUTE val_out = val_in;
[has_min, min_opt, has_max, max_opt](const auto val_in) {
auto val_out = val_in;
if (has_min) {
val_out = utils::max_override(
val_out, utils::scalar_to<CTYPE_COMPUTE>(min_opt.value()));
Expand Down
3 changes: 1 addition & 2 deletions kernels/portable/cpu/op_elu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ Tensor& elu_out(
CTYPE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[negcoef, math_scale, math_input_scale](const auto x) {
// TODO: rewrite this to be vectorization-capable.
[negcoef, math_scale, math_input_scale](const CTYPE x) {
return MathT(x) <= MathT(0)
? std::expm1(MathT(x) * math_input_scale) * negcoef
: MathT(x) * math_scale;
Expand Down
8 changes: 3 additions & 5 deletions kernels/portable/cpu/op_fmod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Tensor& fmod_Tensor_out(
utils::SupportedTensorDtypes::REALHBF16>(
[&div_by_zero_error](
const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
// TODO: rewrite this to be vectorization-capable.
// TODO: rewrite this to be vectorization-capable?
CTYPE_COMPUTE value = 0;
if (is_integral_type<CTYPE_COMPUTE, /*includeBool=*/true>::value) {
if (val_b == 0) {
Expand Down Expand Up @@ -138,10 +138,8 @@ Tensor& fmod_Scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
[val_b](const CTYPE_COMPUTE val_a) {
// TODO: rewrite this to be vectorization-capable.
CTYPE_COMPUTE value = std::fmod(val_a, val_b);
return value;
[val_b](const auto val_a) {
return executorch::math::fmod(val_a, (decltype(val_a))val_b);
},
ctx,
a,
Expand Down
2 changes: 1 addition & 1 deletion kernels/portable/cpu/op_maximum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Tensor& maximum_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBBF16>(
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
[](const auto val_a, const auto val_b) {
return utils::max_override(val_a, val_b);
},
ctx,
Expand Down
3 changes: 1 addition & 2 deletions kernels/portable/cpu/op_minimum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ Tensor& minimum_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBBF16>(
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
// TODO: rewrite this to be vectorization-capable.
[](const auto val_a, const auto val_b) {
return utils::min_override(val_a, val_b);
},
ctx,
Expand Down
4 changes: 1 addition & 3 deletions kernels/portable/cpu/op_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ Tensor& mul_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBBF16>(
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
return val_a * val_b;
},
[](const auto val_a, const auto val_b) { return val_a * val_b; },
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down
10 changes: 6 additions & 4 deletions kernels/portable/cpu/op_native_dropout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ std::tuple<Tensor&, Tensor&> native_dropout_out(
}
ET_SWITCH_FLOATHBF16_TYPES(
input.scalar_type(), ctx, op_name, CTYPE_COMPUTE, [&]() {
utils::apply_bitensor_elementwise_fn<CTYPE_COMPUTE, op_name>(
[](const auto val, const auto mask_val) {
utils::apply_bitensor_elementwise_fn<
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[](const CTYPE_COMPUTE val, const CTYPE_COMPUTE mask_val) {
if (!mask_val) {
return static_cast<decltype(val)>(0);
}
Expand All @@ -70,8 +73,7 @@ std::tuple<Tensor&, Tensor&> native_dropout_out(
mask,
// TODO: should really be just BOOL
utils::SupportedTensorDtypes::BOOL_OR_BYTE,
out,
utils::SupportedTensorDtypes::SAME_AS_COMMON);
out);
});
} else if (input.numel() > 0) {
std::memcpy(out.mutable_data_ptr(), input.data_ptr(), input.nbytes());
Expand Down
23 changes: 16 additions & 7 deletions kernels/portable/cpu/op_pow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ Tensor& pow_Tensor_Tensor_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
// TODO: rewrite this to be vectorization-capable.
return std::pow(val_a, val_b);
[](const auto val_a, const auto val_b) {
return executorch::math::pow(val_a, val_b);
},
ctx,
a,
Expand Down Expand Up @@ -111,8 +110,13 @@ Tensor& pow_Tensor_Scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
// TODO: rewrite this to be vectorization-capable.
[val_b](const CTYPE_COMPUTE val_a) { return std::pow(val_a, val_b); },
// Casting val_b here supports vectorization; it does
// nothing if we are not vectorizing (casts to
// CTYPE_COMPUTE) and casts to a vectorized type
// otherwise.
[val_b](const auto val_a) {
return executorch::math::pow(val_a, decltype(val_a)(val_b));
},
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down Expand Up @@ -161,8 +165,13 @@ Tensor& pow_Scalar_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
// TODO: rewrite this to be vectorization-capable.
[val_a](const CTYPE_COMPUTE val_b) { return std::pow(val_a, val_b); },
// Casting val_a here supports vectorization; it does
// nothing if we are not vectorizing (casts to
// CTYPE_COMPUTE) and casts to a vectorized type
// otherwise.
[val_a](const auto val_b) {
return executorch::math::pow(decltype(val_b)(val_a), val_b);
},
ctx,
b,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down
7 changes: 3 additions & 4 deletions kernels/portable/cpu/op_sigmoid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ Tensor& sigmoid_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::FLOATHBF16>(
[](const auto val_in) -> CTYPE_COMPUTE {
// TODO: rewrite this to be vectorization-capable
CTYPE_COMPUTE out_val = static_cast<CTYPE_COMPUTE>(1.0) /
(static_cast<CTYPE_COMPUTE>(1.0) + exp(-val_in));
[](const auto val_in) {
const auto one = static_cast<decltype(val_in)>(1.0);
auto out_val = one / (one + executorch::math::exp(-val_in));
return out_val;
},
ctx,
Expand Down
7 changes: 4 additions & 3 deletions kernels/portable/cpu/op_sub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Tensor& sub_out(
op_name,
utils::SupportedTensorDtypes::REALHBF16>(
[val_alpha](const auto val_a, const auto val_b) {
return val_a - val_alpha * val_b;
return val_a - (decltype(val_b))(val_alpha)*val_b;
},
ctx,
a,
Expand Down Expand Up @@ -112,12 +112,13 @@ Tensor& sub_scalar_out(
ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
const CTYPE_COMPUTE val_b = utils::scalar_to<CTYPE_COMPUTE>(b);
const CTYPE_COMPUTE val_alpha = utils::scalar_to<CTYPE_COMPUTE>(alpha);
const auto val_alpha_times_b = val_alpha * val_b;
utils::apply_unitensor_elementwise_fn<
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[val_b, val_alpha](const auto val_a) {
return val_a - val_alpha * val_b;
[val_alpha_times_b](const auto val_a) {
return val_a - (decltype(val_a))(val_alpha_times_b);
},
ctx,
a,
Expand Down
6 changes: 3 additions & 3 deletions kernels/portable/cpu/op_where.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ Tensor& where_out(
CTYPE_COMPUTE,
op_name,
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
[](const auto val_a, const auto val_b, const auto val_c) {
return val_c ? val_a : val_b;
},
[](const CTYPE_COMPUTE val_a,
const CTYPE_COMPUTE val_b,
const CTYPE_COMPUTE val_c) { return val_c ? val_a : val_b; },
ctx,
a,
utils::SupportedTensorDtypes::REALHBBF16,
Expand Down
Loading
Loading