Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 2 additions & 23 deletions kernels/optimized/cpu/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#include <executorch/kernels/optimized/cpu/binary_ops.h>
#include <executorch/kernels/portable/cpu/op_add.h>
#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/kernels/portable/cpu/util/broadcast_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
Expand Down Expand Up @@ -137,29 +138,7 @@ Tensor& opt_add_scalar_out(
out.numel());
});
} else {
ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, "add.Scalar_out", CTYPE_A, [&]() {
ET_SWITCH_REALB_TYPES(
common_type, ctx, "add.Scalar_out", CTYPE_IN, [&]() {
ET_SWITCH_REALHBBF16_TYPES(
out_type, ctx, "add.Scalar_out", CTYPE_OUT, [&]() {
CTYPE_IN b_casted = utils::scalar_to<CTYPE_IN>(b);
CTYPE_IN alpha_val;
ET_KERNEL_CHECK(
ctx,
utils::extract_scalar(alpha, &alpha_val),
InvalidArgument, );

const size_t n = a.numel();
const CTYPE_A* a_data = a.const_data_ptr<CTYPE_A>();
CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
for (auto i = 0; i < n; ++i) {
out_data[i] = static_cast<CTYPE_OUT>(
static_cast<CTYPE_IN>(a_data[i]) +
alpha_val * b_casted);
}
});
});
});
utils::add_scalar_out(ctx, a, b, alpha, out);
}

return out;
Expand Down
90 changes: 7 additions & 83 deletions kernels/optimized/cpu/op_add_sub_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#include <executorch/kernels/optimized/cpu/binary_ops.h>
#include <executorch/kernels/portable/cpu/op_add.h>
#include <executorch/kernels/portable/cpu/op_sub.h>
#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/kernels/portable/cpu/util/broadcast_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
Expand All @@ -19,55 +21,6 @@ namespace executor {
namespace kernels {
namespace impl {

namespace {
template <
bool can_cast,
typename CTYPE_A,
typename CTYPE_B,
typename CTYPE_IN,
typename CTYPE_OUT>
struct AddInner;

template <
typename CTYPE_A,
typename CTYPE_B,
typename CTYPE_IN,
typename CTYPE_OUT>
struct AddInner<true, CTYPE_A, CTYPE_B, CTYPE_IN, CTYPE_OUT> {
static void
run(const Tensor& a, const Tensor& b, CTYPE_IN alpha_val, Tensor& out) {
apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
// NOLINTNEXTLINE(facebook-hte-ConstantArgumentPassByValue)
[alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) {
CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
CTYPE_IN value = a_casted + alpha_val * b_casted;

return static_cast<CTYPE_OUT>(value);
},
a,
b,
out);
}
};

template <typename CTYPE_IN>
struct ReportCanCastBug {
static void run(const Tensor&, const Tensor&, CTYPE_IN, Tensor&) {
ET_DCHECK_MSG(false, "BUG: canCast should have been checked above");
}
};

template <
typename CTYPE_A,
typename CTYPE_B,
typename CTYPE_IN,
typename CTYPE_OUT>
struct AddInner<false, CTYPE_A, CTYPE_B, CTYPE_IN, CTYPE_OUT>
: public ReportCanCastBug<CTYPE_IN> {};

} // namespace

using Tensor = executorch::aten::Tensor;
using ScalarType = executorch::aten::ScalarType;

Expand Down Expand Up @@ -203,40 +156,11 @@ Tensor& opt_add_sub_out_impl(
}
});
} else {
ScalarType common_type =
promoteTypes(a_type, b_type, /*half_to_float*/ true);
ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out);

ET_KERNEL_CHECK(
ctx,
resize_to_broadcast_target_size(a, b, out) == Error::Ok,
InvalidArgument,
out);

ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, op_name, CTYPE_A, [&]() {
ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, op_name, CTYPE_B, [&]() {
using CTYPE_IN = typename torch::executor::
promote_types<CTYPE_A, CTYPE_B, /*half_to_float*/ true>::type;
ET_DCHECK(CppTypeToScalarType<CTYPE_IN>::value == common_type);
ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, op_name, CTYPE_OUT, [&]() {
CTYPE_IN alpha_val;
ET_KERNEL_CHECK(
ctx,
torch::executor::native::utils::extract_scalar(alpha, &alpha_val),
InvalidArgument, );
if constexpr (is_sub) {
alpha_val = -alpha_val;
}

AddInner<
can_cast<CTYPE_IN, CTYPE_OUT>::value,
CTYPE_A,
CTYPE_B,
CTYPE_IN,
CTYPE_OUT>::run(a, b, alpha_val, out);
});
});
});
if constexpr (is_sub) {
native::utils::sub_out(ctx, a, b, alpha, out);
} else {
native::utils::add_out(ctx, a, b, alpha, out);
}
}

return out;
Expand Down
77 changes: 3 additions & 74 deletions kernels/optimized/cpu/op_div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#include <executorch/kernels/optimized/cpu/binary_ops.h>
#include <executorch/kernels/portable/cpu/op_div.h>
#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/kernels/portable/cpu/util/broadcast_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
Expand All @@ -18,26 +19,6 @@ namespace torch {
namespace executor {
namespace native {

namespace {

ScalarType get_compute_type(ScalarType a_type, ScalarType b_type) {
ET_CHECK(
!isComplexType(a_type) && !isQIntType(a_type) && !isBitsType(a_type));
ET_CHECK(
!isComplexType(b_type) && !isQIntType(b_type) && !isBitsType(b_type));

if (isFloatingType(a_type) && isFloatingType(b_type)) {
return promoteTypes(a_type, b_type);
} else if (isFloatingType(a_type)) {
return a_type;
} else if (isFloatingType(b_type)) {
return b_type;
}
return ScalarType::Float;
}

} // namespace

Tensor& opt_div_out(
KernelRuntimeContext& ctx,
const Tensor& a,
Expand Down Expand Up @@ -139,34 +120,7 @@ Tensor& opt_div_out(
}
});
} else {
ScalarType common_type = get_compute_type(a_type, b_type);
ET_KERNEL_CHECK(ctx, canCast(common_type, out_type), InvalidArgument, out);

ET_KERNEL_CHECK(
ctx,
resize_to_broadcast_target_size(a, b, out) == Error::Ok,
InvalidArgument,
out);

ET_SWITCH_REALB_TYPES(a_type, ctx, "div.out", CTYPE_A, [&]() {
ET_SWITCH_REALB_TYPES(b_type, ctx, "div.out", CTYPE_B, [&]() {
ET_SWITCH_REALB_TYPES(common_type, ctx, "div.out", CTYPE_IN, [&]() {
ET_SWITCH_REALB_TYPES(out_type, ctx, "div.out", CTYPE_OUT, [&]() {
apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
[](const CTYPE_A val_a, const CTYPE_B val_b) {
CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
CTYPE_IN value = a_casted / b_casted;

return static_cast<CTYPE_OUT>(value);
},
a,
b,
out);
});
});
});
});
utils::div_out(ctx, a, b, out);
}

return out;
Expand Down Expand Up @@ -208,32 +162,7 @@ Tensor& opt_div_scalar_out(
});
});
} else {
ET_SWITCH_REAL_TYPES_AND(
Bool, a_type, ctx, "div.Scalar_out", CTYPE_A, [&]() {
ET_SWITCH_REAL_TYPES_AND(
Bool, b_type, ctx, "div.Scalar_out", CTYPE_B, [&]() {
ET_SWITCH_REAL_TYPES(
common_type, ctx, "div.Scalar_out", CTYPE_IN, [&]() {
ET_SWITCH_REAL_TYPES(
out_type, ctx, "div.Scalar_out", CTYPE_OUT, [&]() {
CTYPE_B b_val;
ET_EXTRACT_SCALAR(b, b_val);
CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
CTYPE_IN inv_b_casted = CTYPE_IN(1) / b_casted;

const size_t n = a.numel();
const CTYPE_A* a_data = a.const_data_ptr<CTYPE_A>();
CTYPE_OUT* out_data =
out.mutable_data_ptr<CTYPE_OUT>();
for (auto i = 0; i < n; ++i) {
out_data[i] = static_cast<CTYPE_OUT>(
static_cast<CTYPE_IN>(a_data[i]) *
inv_b_casted);
}
});
});
});
});
utils::div_scalar_out(ctx, a, b, out);
}

return out;
Expand Down
36 changes: 3 additions & 33 deletions kernels/optimized/cpu/op_le.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#include <executorch/kernels/optimized/cpu/binary_ops.h>
#include <executorch/kernels/portable/cpu/pattern/comparison_op.h>
#include <executorch/kernels/portable/cpu/op_le.h>
#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/kernels/portable/cpu/util/broadcast_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
Expand Down Expand Up @@ -61,10 +61,7 @@ Tensor& opt_le_tensor_out(
ctx, le_lambda, a, b, out, selected_optimized_path);
});
} else {
// @lint-ignore CLANGTIDY facebook-hte-CArray
static constexpr const char op_name[] = "le.Tensor_out";
return internal::comparison_tensor_out<std::less_equal, op_name>(
ctx, a, b, out);
utils::le_tensor_out(ctx, a, b, out);
}

return out;
Expand Down Expand Up @@ -107,34 +104,7 @@ Tensor& opt_le_scalar_out(
});
});
} else {
ET_SWITCH_REAL_TYPES_AND(
Bool, a_type, ctx, "le.Scalar_out", CTYPE_A, [&]() {
ET_SWITCH_REAL_TYPES_AND(
Bool, b_type, ctx, "le.Scalar_out", CTYPE_B, [&]() {
ET_SWITCH_REAL_TYPES_AND(
Bool, common_type, ctx, "le.Scalar_out", CTYPE_IN, [&]() {
ET_SWITCH_REAL_TYPES_AND(
Bool,
out_type,
ctx,
"le.Scalar_out",
CTYPE_OUT,
[&]() {
CTYPE_B b_val = 0;
ET_EXTRACT_SCALAR(b, b_val);
CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
const size_t n = a.numel();
const CTYPE_A* a_data = a.const_data_ptr<CTYPE_A>();
CTYPE_OUT* out_data =
out.mutable_data_ptr<CTYPE_OUT>();
for (auto i = 0; i < n; ++i) {
out_data[i] = static_cast<CTYPE_OUT>(
static_cast<CTYPE_IN>(a_data[i]) <= b_casted);
}
});
});
});
});
utils::le_scalar_out(ctx, a, b, out);
}

return out;
Expand Down
Loading
Loading