Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 99 additions & 25 deletions backends/cadence/fusion_g3/operators/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,37 @@
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/cadence/fusion_g3/operators/operators.h>

#include <xa_nnlib_kernels_api.h>

#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
#include <executorch/kernels/portable/cpu/util/kernel_ops_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <executorch/runtime/platform/assert.h>
#include <xa_nnlib_kernels_api.h>

using exec_aten::Scalar;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using executorch::runtime::canCast;
using torch::executor::Error;
using torch::executor::KernelRuntimeContext;
using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::canCast;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;

namespace cadence {
namespace impl {
namespace G3 {
namespace native {

#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
const auto ret = kernel(__VA_ARGS__); \
ET_KERNEL_CHECK_MSG( \
ctx, \
ret == 0, \
InvalidArgument, \
out, \
"Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");

Tensor& add_out(
KernelRuntimeContext& ctx,
const Tensor& a,
Expand Down Expand Up @@ -121,13 +133,30 @@ Tensor& add_out(
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);

if ((a.numel() == 1) && (alpha_val == 1)) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp2_data,
inp1_data[0],
alpha_val,
out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp1_data,
inp2_data[0],
alpha_val,
out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_32x32_32(
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_broadcast_5D_32x32_32,
out_data,
out_shape,
inp1_data,
Expand All @@ -137,8 +166,15 @@ Tensor& add_out(
max_dim,
alpha_val);
} else {
xa_nn_elm_add_32x32_32(
out_data, inp1_data, inp2_data, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_32x32_32,
out_data,
inp1_data,
inp2_data,
alpha_val,
out.numel());
}
} else if ((compute_type == ScalarType::Float) && (optimized)) {
const float* const inp1_data = a.const_data_ptr<float>();
Expand All @@ -149,13 +185,30 @@ Tensor& add_out(
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);

if ((a.numel() == 1) && (alpha_val == 1.0)) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp2_data,
inp1_data[0],
alpha_val,
out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp1_data,
inp2_data[0],
alpha_val,
out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_f32xf32_f32(
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_broadcast_5D_f32xf32_f32,
out_data,
out_shape,
inp1_data,
Expand All @@ -165,8 +218,15 @@ Tensor& add_out(
max_dim,
alpha_val);
} else {
xa_nn_elm_add_f32xf32_f32(
out_data, inp1_data, inp2_data, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_f32xf32_f32,
out_data,
inp1_data,
inp2_data,
alpha_val,
out.numel());
}
} else {
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
Expand Down Expand Up @@ -242,8 +302,15 @@ Tensor& add_scalar_out(

int* const out_data = out.mutable_data_ptr<int>();

xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp1_data,
inp2_val,
alpha_val,
out.numel());

} else if (compute_type == ScalarType::Float) {
const float* const inp1_data = a.const_data_ptr<float>();
Expand All @@ -255,8 +322,15 @@ Tensor& add_scalar_out(

float* const out_data = out.mutable_data_ptr<float>();

xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp1_data,
inp2_val,
alpha_val,
out.numel());

} else {
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
Expand Down
17 changes: 9 additions & 8 deletions backends/cadence/fusion_g3/operators/op_cat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,17 @@
* LICENSE file in the root directory of this source tree.
*/

#include <cstring>

#include <xa_nnlib_kernels_api.h>

#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <xa_nnlib_kernels_api.h>
#include <cstring>

using exec_aten::Scalar;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using torch::executor::Error;
using torch::executor::KernelRuntimeContext;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;

/* ScalarType in Executorch do not have support for below data types.
* So, creating a placeholder for these data types. Once, ScalarTypes is
Expand Down Expand Up @@ -194,4 +195,4 @@ Tensor& cat_out(
} // namespace native
} // namespace G3
} // namespace impl
} // namespace cadence
} // namespace cadence
24 changes: 13 additions & 11 deletions backends/cadence/fusion_g3/operators/op_dequantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/kernels/portable/cpu/util/reduce_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <xa_nnlib_kernels_api.h>
#include <algorithm>
#include <cinttypes>
#include <cmath>

using exec_aten::Scalar;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using torch::executor::Error;
using torch::executor::KernelRuntimeContext;
#include <xa_nnlib_kernels_api.h>

#include <executorch/kernels/portable/cpu/util/reduce_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>

using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;

template <typename T>
using optional = exec_aten::optional<T>;
Expand Down Expand Up @@ -185,7 +187,7 @@ void dequantize_impl(
if (axis == NULL) {
// calculate the dequantized output, cast scale to float to match fbgemm
// behavior
#define ASYM_DEQUANTIZE_IMPL_TESNOR(IN_CTYPE, OUT_CTYPE, out_dtype) \
#define ASYM_DEQUANTIZE_IMPL_TENSOR(IN_CTYPE, OUT_CTYPE, out_dtype) \
case ScalarType::out_dtype: { \
/* Hoist these function calls out of our inner loop because they might not \
* get inlined without LTO, particularly in ATen mode. */ \
Expand All @@ -201,7 +203,7 @@ void dequantize_impl(
#define ASYM_CALCULATE_INT_TYPE_TENSOR(IN_CTYPE, in_dtype) \
case ScalarType::in_dtype: \
switch (out.scalar_type()) { \
ET_FORALL_FLOAT_TYPES_WITH(IN_CTYPE, ASYM_DEQUANTIZE_IMPL_TESNOR); \
ET_FORALL_FLOAT_TYPES_WITH(IN_CTYPE, ASYM_DEQUANTIZE_IMPL_TENSOR); \
default: \
ET_CHECK_MSG( \
false, \
Expand All @@ -219,7 +221,7 @@ void dequantize_impl(
static_cast<int8_t>(input.scalar_type()));
}
#undef ASYM_CALCULATE_INT_TYPE_TENSOR
#undef ASYM_DEQUANTIZE_IMPL_TESNOR
#undef ASYM_DEQUANTIZE_IMPL_TENSOR
} else {
// a list contains all dimensions except axis
int64_t dims[input.dim() - 1];
Expand Down
17 changes: 9 additions & 8 deletions backends/cadence/fusion_g3/operators/op_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@
* LICENSE file in the root directory of this source tree.
*/

#include <xa_nnlib_kernels_api.h>

#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <executorch/runtime/platform/assert.h>
#include <xa_nnlib_kernels_api.h>

using exec_aten::Scalar;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using executorch::runtime::canCast;
using torch::executor::Error;
using torch::executor::KernelRuntimeContext;
using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::canCast;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;

namespace cadence {
namespace impl {
Expand Down Expand Up @@ -238,4 +239,4 @@ Tensor& mul_scalar_out(
} // namespace native
} // namespace G3
} // namespace impl
} // namespace cadence
} // namespace cadence
20 changes: 11 additions & 9 deletions backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@
* LICENSE file in the root directory of this source tree.
*/

#include <cmath>
#include <tuple>

#include <xa_nnlib_kernels_api.h>

#include <executorch/kernels/portable/cpu/util/normalization_ops_util.h>
#include <executorch/kernels/portable/cpu/vec_ops.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <xa_nnlib_kernels_api.h>
#include <cmath>
#include <tuple>

using Tensor = exec_aten::Tensor;
using ScalarType = exec_aten::ScalarType;
using IntArrayRef = exec_aten::ArrayRef<int64_t>;
using torch::executor::Error;
using torch::executor::KernelRuntimeContext;
using ::executorch::aten::IntArrayRef;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;

namespace cadence {
namespace impl {
Expand Down Expand Up @@ -255,4 +257,4 @@ std::tuple<Tensor&, Tensor&, Tensor&> native_layer_norm_out(
} // namespace native
} // namespace G3
} // namespace impl
} // namespace cadence
} // namespace cadence
Loading
Loading