diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index 3f98745093c..937e3e39bc1 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -235,12 +235,12 @@ def quantize_and_export_to_cadence( def export_to_executorch_gen_etrecord( model: torch.nn.Module, inputs: tuple[object, ...], - dump_graphs: bool = False, output_dir: Optional[str] = None, opt_level: int = 1, + dump_graphs: bool = False, ) -> ExecutorchProgramManager: - edge_prog_manager = export_to_edge(model, inputs) cadence_passes = get_cadence_passes(opt_level) + edge_prog_manager = export_to_edge(model, inputs, dump_graphs) # Run a couple required passes for quant/dequant ops cadence_prog_manager = edge_prog_manager.transform( diff --git a/backends/cadence/hifi/operators/op_add.cpp b/backends/cadence/hifi/operators/op_add.cpp index 43cb0d8cd62..ec0e48e3791 100644 --- a/backends/cadence/hifi/operators/op_add.cpp +++ b/backends/cadence/hifi/operators/op_add.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include @@ -121,7 +123,7 @@ Tensor& add_out( float alpha_val; torch::executor::native::utils::extract_scalar(alpha, &alpha_val); - constexpr auto name = "add.out"; + static constexpr const char op_name[] = "add.out"; constexpr int kNnlibMaxDim = 4; /*fallback if broadcast and dim > 4 */ int a_dim = a.dim(), b_dim = b.dim(), out_dim = out.dim(); @@ -178,23 +180,25 @@ Tensor& add_out( return out; } - ET_SWITCH_REALHBBF16_TYPES(a_type, ctx, name, CTYPE_A, [&]() { - ET_SWITCH_REALHBBF16_TYPES(b_type, ctx, name, CTYPE_B, [&]() { - using CTYPE_IN = typename torch::executor:: - promote_types::type; - ET_DCHECK(CppTypeToScalarType::value == common_type); - CTYPE_IN alpha_val; - torch::executor::native::utils::extract_scalar(alpha, &alpha_val); - - ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() { - AddInner< - can_cast::value, - CTYPE_A, - CTYPE_B, - CTYPE_IN, - CTYPE_OUT>::run(a, b, alpha_val, out); - }); - }); + // Compute Dtype + ScalarType compute_type = + torch::executor::native::utils::get_compute_type(common_type); + + ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { + const CTYPE_COMPUTE val_alpha = + torch::executor::native::utils::scalar_to(alpha); + torch::executor::native::utils:: + apply_bitensor_elementwise_fn( + [val_alpha](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) { + return val_a + val_alpha * val_b; + }, + ctx, + a, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + b, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + out, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16); }); return out; diff --git a/backends/cadence/hifi/operators/op_div.cpp b/backends/cadence/hifi/operators/op_div.cpp index 88e670b432f..05f3db7ec31 100644 --- a/backends/cadence/hifi/operators/op_div.cpp +++ b/backends/cadence/hifi/operators/op_div.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include @@ -134,25 +136,26 @@ div_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) { InvalidArgument, out); - ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "div.out", CTYPE_A, [&]() { - ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "div.out", CTYPE_B, [&]() { - ET_SWITCH_FLOAT_TYPES(common_type, ctx, "div.out", CTYPE_IN, [&]() { - ET_SWITCH_FLOAT_TYPES(out_type, ctx, "div.out", CTYPE_OUT, [&]() { - torch::executor:: - apply_binary_elementwise_fn( - [](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = a_casted / b_casted; - - return static_cast(value); - }, - a, - b, - out); - }); - }); - }); + // Compute Dtype + ScalarType compute_type = + torch::executor::native::utils::get_compute_type(common_type); + + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "div.out"; + + ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { + torch::executor::native::utils:: + apply_bitensor_elementwise_fn( + [](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) { + return val_a / val_b; + }, + ctx, + a, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + b, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + out, + torch::executor::native::utils::SupportedTensorDtypes::FLOATHBF16); }); return out; @@ -254,35 +257,59 @@ Tensor& div_out_mode( return out; } - ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "div.out_mode", CTYPE_A, [&]() { - ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "div.out_mode", CTYPE_B, [&]() { - ET_SWITCH_FLOAT_TYPES(common_type, ctx, "div.out_mode", CTYPE_IN, [&]() { - ET_SWITCH_REAL_TYPES(out_type, ctx, "div.out_mode", CTYPE_OUT, [&]() { - torch::executor:: - apply_binary_elementwise_fn( - [mode](const CTYPE_A val_a, const CTYPE_B val_b) { - CTYPE_IN a_casted = static_cast(val_a); - CTYPE_IN b_casted = static_cast(val_b); - CTYPE_IN value = a_casted / b_casted; - if (mode.has_value() && mode.value() == "trunc") { - value = std::trunc(value); - } else if (mode.has_value() && mode.value() == "floor") { - value = std::floor(value); - } - return static_cast(value); - }, - a, - b, - out); - }); - }); - }); + bool div_by_zero_error = false; + const bool mode_is_trunc = (mode.has_value() && mode.value() == "trunc"); + // Compute Dtype + ScalarType compute_type = + torch::executor::native::utils::get_compute_type(common_type); + + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "div.out"; + + ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { + torch::executor::native::utils:: + apply_bitensor_elementwise_fn( + [mode_is_trunc, &div_by_zero_error]( + const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) { + if (executorch::runtime::is_integral_type< + CTYPE_COMPUTE, + /*includeBool=*/true>::value) { + if (val_b == 0) { + div_by_zero_error = true; + return static_cast(0); + } + } + CTYPE_COMPUTE value = val_a / val_b; + if (mode_is_trunc) { + value = std::trunc(value); + } else { + // We established above that the mode is either trunc or floor, + // so it must be floor. + value = + torch::executor::native::utils::floor_divide(val_a, val_b); + } + return value; + }, + ctx, + a, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + b, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + out, + torch::executor::native::utils::SupportedTensorDtypes::REALHBF16); }); + ET_KERNEL_CHECK_MSG( + ctx, + !div_by_zero_error, + InvalidArgument, + out, + "Div mode operation encountered integer division by zero"); + return out; } } // namespace native } // namespace HiFi } // namespace impl -} // namespace cadence \ No newline at end of file +} // namespace cadence diff --git a/backends/cadence/hifi/operators/op_mean.cpp b/backends/cadence/hifi/operators/op_mean.cpp index 478e10da712..ed5ed3359e5 100644 --- a/backends/cadence/hifi/operators/op_mean.cpp +++ b/backends/cadence/hifi/operators/op_mean.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -141,11 +142,11 @@ Tensor& mean_dim_out( return out; } - ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, name, CTYPE_IN, [&] { - ET_SWITCH_FLOATH_TYPES(out.scalar_type(), ctx, name, CTYPE_OUT, [&] { + ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, "mean.out", CTYPE_IN, [&] { + ET_SWITCH_FLOATH_TYPES(out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] { CTYPE_OUT* out_data = out.mutable_data_ptr(); - const size_t num = torch::executor::get_reduced_dim_product(in, dim_list); - + const size_t num = + torch::executor::exeget_reduced_dim_product(in, dim_list); for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { CTYPE_OUT sum = 0; if (in.numel() > 0) { diff --git a/backends/cadence/hifi/operators/op_mul.cpp b/backends/cadence/hifi/operators/op_mul.cpp index ad12606bdf6..396833dd1af 100644 --- a/backends/cadence/hifi/operators/op_mul.cpp +++ b/backends/cadence/hifi/operators/op_mul.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include @@ -144,20 +146,26 @@ mul_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) { return out; } - ET_SWITCH_REALHB_TYPES(a_type, ctx, "mul.out", CTYPE_A, [&]() { - ET_SWITCH_REALHB_TYPES(b_type, ctx, "mul.out", CTYPE_B, [&]() { - using CTYPE_IN = typename torch::executor:: - promote_types::type; - ET_DCHECK(CppTypeToScalarType::value == common_type); - ET_SWITCH_REALHB_TYPES(out_type, ctx, "mul.out", CTYPE_OUT, [&]() { - MulInner< - can_cast::value, - CTYPE_A, - CTYPE_B, - CTYPE_IN, - CTYPE_OUT>::run(a, b, out); - }); - }); + // Compute Dtype + ScalarType compute_type = + torch::executor::native::utils::get_compute_type(common_type); + + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "mul.Scalar_out"; + + ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { + torch::executor::native::utils:: + apply_bitensor_elementwise_fn( + [](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) { + return val_a * val_b; + }, + ctx, + a, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + b, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + out, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16); }); return out; @@ -166,4 +174,4 @@ mul_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) { } // namespace native } // namespace HiFi } // namespace impl -} // namespace cadence \ No newline at end of file +} // namespace cadence diff --git a/backends/cadence/hifi/operators/op_sigmoid.cpp b/backends/cadence/hifi/operators/op_sigmoid.cpp index b9fa73b879f..35321cc27eb 100644 --- a/backends/cadence/hifi/operators/op_sigmoid.cpp +++ b/backends/cadence/hifi/operators/op_sigmoid.cpp @@ -9,6 +9,8 @@ #include #include +#include +#include #include #include @@ -58,19 +60,27 @@ Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { return out; } - ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() { - ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() { - torch::executor::apply_unary_map_fn( - [](const CTYPE_IN val_in) { - // perform math in double to preserve precision - double in_casted = static_cast(val_in); - double out_val = 1.0 / (1.0 + exp(-in_casted)); - return static_cast(out_val); - }, - in.const_data_ptr(), - out.mutable_data_ptr(), - in.numel()); - }); + ScalarType compute_type = + executorch::runtime::isFloatingType(in.scalar_type()) ? in.scalar_type() + : ScalarType::Float; + compute_type = torch::executor::native::utils::get_compute_type(compute_type); + + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "sigmoid.out"; + + ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { + torch::executor::native::utils:: + apply_unitensor_elementwise_fn( + [](const CTYPE_COMPUTE val_in) { + CTYPE_COMPUTE out_val = static_cast(1.0) / + (static_cast(1.0) + exp(-val_in)); + return out_val; + }, + ctx, + in, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + out, + torch::executor::native::utils::SupportedTensorDtypes::FLOATHBF16); }); return out; @@ -79,4 +89,4 @@ Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) { } // namespace native } // namespace HiFi } // namespace impl -} // namespace cadence \ No newline at end of file +} // namespace cadence diff --git a/backends/cadence/hifi/operators/op_sub.cpp b/backends/cadence/hifi/operators/op_sub.cpp index 0a362dbf959..cf10e414354 100644 --- a/backends/cadence/hifi/operators/op_sub.cpp +++ b/backends/cadence/hifi/operators/op_sub.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include @@ -176,22 +178,28 @@ Tensor& sub_out( return out; } - ET_SWITCH_REALH_TYPES(a_type, ctx, name, CTYPE_A, [&]() { - ET_SWITCH_REALH_TYPES(b_type, ctx, name, CTYPE_B, [&]() { - using CTYPE_IN = typename torch::executor:: - promote_types::type; - ET_DCHECK(CppTypeToScalarType::value == common_type); - CTYPE_IN alpha_val; - torch::executor::native::utils::extract_scalar(alpha, &alpha_val); - ET_SWITCH_REALH_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() { - SubInner< - can_cast::value, - CTYPE_A, - CTYPE_B, - CTYPE_IN, - CTYPE_OUT>::run(a, b, alpha_val, out); - }); - }); + // Compute Dtype + ScalarType compute_type = + torch::executor::native::utils::get_compute_type(common_type); + + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "sub.out"; + + ET_SWITCH_REAL_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { + const CTYPE_COMPUTE val_alpha = + torch::executor::native::utils::scalar_to(alpha); + torch::executor::native::utils:: + apply_bitensor_elementwise_fn( + [val_alpha](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) { + return val_a - val_alpha * val_b; + }, + ctx, + a, + torch::executor::native::utils::SupportedTensorDtypes::REALHBF16, + b, + torch::executor::native::utils::SupportedTensorDtypes::REALHBF16, + out, + torch::executor::native::utils::SupportedTensorDtypes::REALHBF16); }); return out; @@ -200,4 +208,4 @@ Tensor& sub_out( } // namespace native } // namespace HiFi } // namespace impl -} // namespace cadence \ No newline at end of file +} // namespace cadence diff --git a/backends/cadence/hifi/operators/op_where.cpp b/backends/cadence/hifi/operators/op_where.cpp index 06bd0bc3c9f..c94d2ee65c5 100644 --- a/backends/cadence/hifi/operators/op_where.cpp +++ b/backends/cadence/hifi/operators/op_where.cpp @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include @@ -148,28 +150,43 @@ Tensor& where_out( } return out; } - ET_SWITCH_REALHB_TYPES(a_type, ctx, name, CTYPE_A, [&]() { - ET_SWITCH_REALHB_TYPES(b_type, ctx, name, CTYPE_B, [&]() { - using CTYPE_OUT = - typename torch::executor::promote_types::type; - torch::executor:: - apply_ternary_elementwise_fn( - [](const CTYPE_A val_a, - const CTYPE_B val_b, - const uint8_t val_c) { - CTYPE_OUT a_casted = static_cast(val_a); - CTYPE_OUT b_casted = static_cast(val_b); - return val_c ? a_casted : b_casted; - }, - a, - b, - cond, - out); - }); + + // Compute Dtype + ScalarType compute_type = + torch::executor::native::utils::get_compute_type(common_type); + + // @lint-ignore CLANGTIDY facebook-hte-CArray + static constexpr const char op_name[] = "where.self_out"; + + ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { + torch::executor::native::utils::apply_tritensor_elementwise_fn< + CTYPE_COMPUTE, + op_name>( + [](const CTYPE_COMPUTE val_a, + const CTYPE_COMPUTE val_b, + const CTYPE_COMPUTE val_c) { return val_c ? val_a : val_b; }, + ctx, + a, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + b, + torch::executor::native::utils::SupportedTensorDtypes::REALHBBF16, + cond, + torch::executor::native::utils::SupportedTensorDtypes::BOOL_OR_BYTE, + out, + torch::executor::native::utils::SupportedTensorDtypes::SAME_AS_COMMON); }); return out; } +Tensor& where_self_out( + RuntimeContext& ctx, + const Tensor& cond, + const Tensor& a, + const Tensor& b, + Tensor& out) { + return cadence::impl::HiFi::native::where_out(ctx, cond, a, b, out); +} + } // namespace native } // namespace HiFi } // namespace impl diff --git a/backends/cadence/hifi/operators/targets.bzl b/backends/cadence/hifi/operators/targets.bzl index 96f063728c8..6c671a5f24a 100644 --- a/backends/cadence/hifi/operators/targets.bzl +++ b/backends/cadence/hifi/operators/targets.bzl @@ -11,20 +11,230 @@ def define_common_targets(): # Define build targets for all operators registered in the tables above. runtime.cxx_library( - name = "cadence_hifi_ops", - srcs = glob([ - "*.cpp", - ]), - exported_headers = glob(["*.h"]), + name = "quantize_per_tensor", + srcs = [ + "quantize_per_tensor.cpp" + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "dequantize_per_tensor", + srcs = [ + "dequantize_per_tensor.cpp" + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "quantized_layer_norm", + srcs = [ + "quantized_layer_norm.cpp" + ], + exported_headers = ["operators.h"], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "quantized_linear_out", + srcs = [ + "quantized_linear_out.cpp" + ], + exported_headers = ["operators.h"], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "op_add", + srcs = [ + "op_add.cpp", + ], platforms = CXX, deps = [ "//executorch/kernels/portable/cpu/util:all_deps", "//executorch/kernels/portable/cpu/pattern:all_deps", "//executorch/runtime/kernel:kernel_includes", "//executorch/kernels/portable/cpu:scalar_utils", - "fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib", - "fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib_common", "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions", + "//executorch/kernels/portable/cpu/util:dtype_util", + "//executorch/kernels/portable/cpu/util:elementwise_util", + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + + runtime.cxx_library( + name = "op_mul", + srcs = [ + "op_mul.cpp", + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/kernels/portable/cpu/util:dtype_util", + "//executorch/kernels/portable/cpu/util:elementwise_util", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "op_sub", + srcs = [ + "op_sub.cpp", + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/kernels/portable/cpu/util:dtype_util", + "//executorch/kernels/portable/cpu/util:elementwise_util", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "op_div", + srcs = [ + "op_div.cpp", + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/kernels/portable/cpu:scalar_utils", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/kernels/portable/cpu/util:dtype_util", + "//executorch/kernels/portable/cpu/util:elementwise_util", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "op_sigmoid", + srcs = [ + "op_sigmoid.cpp", + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/kernels/portable/cpu/util:dtype_util", + "//executorch/kernels/portable/cpu/util:elementwise_util", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + runtime.cxx_library( + name = "op_tanh", + srcs = [ + "op_tanh.cpp", + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" + ], + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + ) + + + runtime.cxx_library( + name = "op_where", + srcs = [ + "op_where.cpp", + ], + platforms = CXX, + deps = [ + "//executorch/kernels/portable/cpu/util:all_deps", + "//executorch/kernels/portable/cpu/pattern:all_deps", + "//executorch/runtime/kernel:kernel_includes", + "//executorch/backends/cadence/hifi/kernels:kernels", + "//executorch/kernels/portable/cpu/util:elementwise_util", + "//executorch/backends/cadence/hifi/third-party/nnlib:nnlib-extensions" ], visibility = [ "//executorch/backends/cadence/...", diff --git a/backends/cadence/hifi/third-party/nnlib/TARGETS b/backends/cadence/hifi/third-party/nnlib/TARGETS new file mode 100644 index 00000000000..67f2bab681a --- /dev/null +++ b/backends/cadence/hifi/third-party/nnlib/TARGETS @@ -0,0 +1,5 @@ +load("targets.bzl", "define_common_targets") + +oncall("odai_jarvis") + +define_common_targets() diff --git a/backends/cadence/hifi/third-party/nnlib/targets.bzl b/backends/cadence/hifi/third-party/nnlib/targets.bzl new file mode 100644 index 00000000000..615eacaa666 --- /dev/null +++ b/backends/cadence/hifi/third-party/nnlib/targets.bzl @@ -0,0 +1,18 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +load("@fbsource//tools/build_defs:platform_defs.bzl", "CXX") +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + runtime.cxx_library( + name = "nnlib-extensions", + srcs = native.glob(["*.c", "*.cpp"]), + exported_headers = glob(["*.h"]), + visibility = [ + "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", + ], + deps = [ + "fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib", + ], + ) diff --git a/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c b/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c index 9eab22b05b7..2a18d57e99f 100644 --- a/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c +++ b/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c @@ -25,7 +25,6 @@ #include "xa_nnlib_err_chk.h" #include "xa_nnlib_kernels_api.h" - #if HAVE_VFPU static void internal_elm_add_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict__ p_out, const FLOAT32 * __restrict__ p_inp1, @@ -425,4 +424,3 @@ WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out, return 0; } - diff --git a/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c b/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c index b9aa102a15f..e11fccbba52 100644 --- a/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c +++ b/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c @@ -20,11 +20,10 @@ ******************************************************************************/ #include "xa_type_def.h" -#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_common_fpu.h" -#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nn_common.h" -#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_err_chk.h" -#include "nnlib-hifi4/xa_nnlib/algo/kernels/basic/hifi4/xa_nn_basic_state.h" -#include "nnlib-hifi4/xa_nnlib/include/nnlib/xa_nnlib_kernels_api.h" +#include "xa_nnlib_common_fpu.h" +#include "xa_nn_common.h" +#include "xa_nnlib_err_chk.h" +#include "xa_nnlib_kernels_api.h" #if HAVE_VFPU static void internal_elm_mul_broadcast_2D_f32xf32_f32(FLOAT32 * __restrict__ p_out, diff --git a/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c b/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c index 6a7f6d0f77d..426d60b0742 100644 --- a/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c +++ b/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c @@ -20,10 +20,10 @@ ******************************************************************************/ #include "xa_type_def.h" -#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_common_fpu.h" -#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nn_common.h" -#include "nnlib-hifi4/xa_nnlib/algo/common/include/xa_nnlib_err_chk.h" -#include "nnlib-hifi4/xa_nnlib/algo/kernels/basic/hifi4/xa_nn_basic_state.h" +#include "xa_nnlib_common_fpu.h" +#include "xa_nn_common.h" +#include "xa_nnlib_err_chk.h" +// #include "xa_nn_basic_state.h" #include "xa_nnlib_kernels_api.h" @@ -835,4 +835,4 @@ WORD32 xa_nn_elm_where_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out, return 0; } -#endif \ No newline at end of file +#endif diff --git a/backends/cadence/hifi/third-party/nnlib/xa_nn_reduce_32_32.c b/backends/cadence/hifi/third-party/nnlib/xa_nn_reduce_32_32.c index 5978a92d269..fcc89260be4 100644 --- a/backends/cadence/hifi/third-party/nnlib/xa_nn_reduce_32_32.c +++ b/backends/cadence/hifi/third-party/nnlib/xa_nn_reduce_32_32.c @@ -428,7 +428,7 @@ static inline void xa_nn_reduce_sum_4D_f32_f32(const FLOAT32 * __restrict__ p_in p_wsrc2 = (xtfloatx2 *)(p_scr_in + (itr_n * plane_size) + (itr_h * wc_plane_size) + (itr_w * temp_inp_c)); p_dst = (xtfloatx2 *)(p_scratch + (itr_n * hw_plane_size) + (itr_h * temp_inp_w) + itr_w); align_src = AE_LA64_PP(p_wsrc2); - xtfloatx2 i1 = AE_MOVXTFLOATX2_FROMF32X2(AE_MOVDA32(0)); + xtfloatx2 i1 = XT_AE_MOVXTFLOATX2_FROMF32X2(AE_MOVDA32(0)); for(itr_c = 0; itr_c < (temp_inp_c >> 2); itr_c++) { xtfloatx2 j1, j2; diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index eb4873d1d17..1dc36afce23 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -88,7 +88,7 @@ def define_common_targets(): deps = [ "//executorch/runtime/kernel:kernel_includes", ], - visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/..."], + visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS"], ) runtime.cxx_library( @@ -103,7 +103,7 @@ def define_common_targets(): "//executorch/kernels/portable/cpu:scalar_utils", "//executorch/runtime/kernel:kernel_includes", ], - visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/..."], + visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/...", "@EXECUTORCH_CLIENTS"], ) runtime.cxx_library(