Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 95 additions & 24 deletions backends/cadence/fusion_g3/operators/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,27 @@
#include <executorch/runtime/platform/assert.h>
#include <xa_nnlib_kernels_api.h>

using exec_aten::Scalar;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using executorch::runtime::canCast;
using torch::executor::Error;
using torch::executor::KernelRuntimeContext;
using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::canCast;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;

namespace cadence {
namespace impl {
namespace G3 {
namespace native {

#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
const auto ret = kernel(__VA_ARGS__); \
ET_KERNEL_CHECK_MSG( \
ctx, \
ret == 0, \
InvalidArgument, \
out, \
"Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");

Tensor& add_out(
KernelRuntimeContext& ctx,
const Tensor& a,
Expand Down Expand Up @@ -121,13 +130,30 @@ Tensor& add_out(
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);

if ((a.numel() == 1) && (alpha_val == 1)) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp2_data,
inp1_data[0],
alpha_val,
out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp1_data,
inp2_data[0],
alpha_val,
out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_32x32_32(
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_broadcast_5D_32x32_32,
out_data,
out_shape,
inp1_data,
Expand All @@ -137,8 +163,15 @@ Tensor& add_out(
max_dim,
alpha_val);
} else {
xa_nn_elm_add_32x32_32(
out_data, inp1_data, inp2_data, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_32x32_32,
out_data,
inp1_data,
inp2_data,
alpha_val,
out.numel());
}
} else if ((compute_type == ScalarType::Float) && (optimized)) {
const float* const inp1_data = a.const_data_ptr<float>();
Expand All @@ -149,13 +182,30 @@ Tensor& add_out(
torch::executor::native::utils::extract_scalar(alpha, &alpha_val);

if ((a.numel() == 1) && (alpha_val == 1.0)) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp2_data, inp1_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp2_data,
inp1_data[0],
alpha_val,
out.numel());
} else if (b.numel() == 1) {
xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_data[0], alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp1_data,
inp2_data[0],
alpha_val,
out.numel());
} else if (broadcast) {
xa_nn_elm_add_broadcast_5D_f32xf32_f32(
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_broadcast_5D_f32xf32_f32,
out_data,
out_shape,
inp1_data,
Expand All @@ -165,8 +215,15 @@ Tensor& add_out(
max_dim,
alpha_val);
} else {
xa_nn_elm_add_f32xf32_f32(
out_data, inp1_data, inp2_data, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_f32xf32_f32,
out_data,
inp1_data,
inp2_data,
alpha_val,
out.numel());
}
} else {
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
Expand Down Expand Up @@ -242,8 +299,15 @@ Tensor& add_scalar_out(

int* const out_data = out.mutable_data_ptr<int>();

xa_nn_elm_add_scalar_32x32_32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_32x32_32,
out_data,
inp1_data,
inp2_val,
alpha_val,
out.numel());

} else if (compute_type == ScalarType::Float) {
const float* const inp1_data = a.const_data_ptr<float>();
Expand All @@ -255,8 +319,15 @@ Tensor& add_scalar_out(

float* const out_data = out.mutable_data_ptr<float>();

xa_nn_elm_add_scalar_f32xf32_f32(
out_data, inp1_data, inp2_val, alpha_val, out.numel());
XT_KERNEL_CHECK(
ctx,
out,
xa_nn_elm_add_scalar_f32xf32_f32,
out_data,
inp1_data,
inp2_val,
alpha_val,
out.numel());

} else {
ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
Expand Down
65 changes: 46 additions & 19 deletions backends/cadence/fusion_g3/operators/targets.bzl
Original file line number Diff line number Diff line change
@@ -1,6 +1,45 @@
load("@fbsource//tools/build_defs:platform_defs.bzl", "CXX")
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_operator(name: str, deps: list[str] | None = None) -> None:
op_name = "op_{}".format(name)

# Deps used by all operators.
common_deps = [
"//executorch/kernels/portable/cpu/util:all_deps",
"//executorch/kernels/portable/cpu/pattern:all_deps",
"//executorch/runtime/kernel:kernel_includes",
"//executorch/kernels/portable/cpu:scalar_utils",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
]
if deps == None:
deps = []

runtime.cxx_library(
name = op_name,
srcs = [op_name + ".cpp"],
platforms = CXX,
visibility = [
"//executorch/backends/cadence/...",
"@EXECUTORCH_CLIENTS",
],
deps = deps + common_deps,
exported_deps = [
":operators_header",
],
)

OPERATORS = [
"add",
"cat",
"dequantize",
"mul",
"native_layer_norm",
"quantize",
"softmax",
]

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.

Expand All @@ -11,28 +50,16 @@ def define_common_targets():
# Define build targets for all operators registered in the tables above.

runtime.cxx_library(
name = "cadence_g3_ops",
srcs = glob([
"*.cpp",
]),
exported_headers = glob([
"*.h",
]),
platforms = CXX,
deps = [
"//executorch/kernels/portable/cpu/util:all_deps",
"//executorch/kernels/portable/cpu/pattern:all_deps",
"//executorch/runtime/kernel:kernel_includes",
"//executorch/kernels/portable/cpu:scalar_utils",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
],
name = "operators_header",
exported_headers = ["operators.h"],
visibility = [
"//executorch/backends/cadence/...",
"@EXECUTORCH_CLIENTS",
],
exported_deps = [
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib_common",
"fbsource//third-party/nnlib-FusionG3/xa_nnlib:libxa_nnlib",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/kernel:kernel_runtime_context",
],
)

for op in OPERATORS:
define_operator(op)
33 changes: 25 additions & 8 deletions backends/cadence/fusion_g3/operators/tests/test_op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <stdio.h>

#include <executorch/backends/cadence/fusion_g3/operators/operators.h>
#include <executorch/kernels/test/TestUtil.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
Expand All @@ -24,24 +26,19 @@ namespace {
using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::aten::TensorImpl;
using ::executorch::runtime::Error;
using ::executorch::runtime::KernelRuntimeContext;
using ::executorch::runtime::runtime_init;
using ::executorch::runtime::testing::TensorFactory;
using ::testing::Test;

class FusionG3OperatorTest : public Test {
class FusionG3OperatorTest : public OperatorTest {
public:
void SetUp() override {
runtime_init();
}

protected:
Tensor&
add_out(const Tensor& a, const Tensor& b, const Scalar& alpha, Tensor& out) {
return cadence::impl::G3::native::add_out(context_, a, b, alpha, out);
}

KernelRuntimeContext context_;
};

TEST_F(FusionG3OperatorTest, TwoDimFloatTensorAddTest) {
Expand Down Expand Up @@ -77,6 +74,26 @@ TEST_F(FusionG3OperatorTest, AddWithBroadcastTest) {
EXPECT_TENSOR_EQ(out, tf.full(size_a, 2));
}

TEST_F(FusionG3OperatorTest, KernelCheckTest) {
TensorFactory<ScalarType::Float> tf;
// Broadcast add.
const std::vector<TensorImpl::SizesType> sizeOfA{1, 3, 2, 4}, sizeOfB{2, 4};
const Tensor b = tf.ones(sizeOfB);
Tensor out = tf.zeros(sizeOfA);
// Create a null tensor to force kernel check failure.
TensorImpl nullTensorImpl(
b.scalar_type(),
b.dim(),
const_cast<TensorImpl::SizesType*>(b.sizes().data()),
// Use nullptr to force kernel check failure.
/*data=*/nullptr,
const_cast<TensorImpl::DimOrderType*>(b.dim_order().data()));
Tensor nullTensor(&nullTensorImpl);

ET_EXPECT_KERNEL_FAILURE(
context_, add_out(tf.ones(sizeOfA), nullTensor, 1, out));
}

} // namespace
} // namespace native
} // namespace G3
Expand Down
3 changes: 3 additions & 0 deletions backends/cadence/runtime/TARGETS
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
load(":targets.bzl", "define_common_targets")
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")

oncall("odai_jarvis")
Expand All @@ -22,3 +23,5 @@ python_library(
"//executorch/exir:lib",
],
)

define_common_targets()
Loading