diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml index 729db66850a..5c25d89946e 100644 --- a/backends/cadence/aot/functions_hifi.yaml +++ b/backends/cadence/aot/functions_hifi.yaml @@ -107,21 +107,21 @@ variants: function kernels: - arg_meta: null - kernel_name: impl::HiFi::quantize_per_tensor_out + kernel_name: cadence::impl::HiFi::quantize_per_tensor_out - func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!) variants: function kernels: - arg_meta: null - kernel_name: impl::HiFi::dequantize_per_tensor_out + kernel_name: cadence::impl::HiFi::dequantize_per_tensor_out - func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_layer_norm_out + kernel_name: cadence::impl::HiFi::quantized_layer_norm_out - func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_linear_out + kernel_name: cadence::impl::HiFi::quantized_linear_out diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp index 4d9183e4cc2..10e5fb176e0 100644 --- a/backends/cadence/hifi/kernels/kernels.cpp +++ b/backends/cadence/hifi/kernels/kernels.cpp @@ -10,6 +10,7 @@ #include #include +namespace cadence { namespace impl { namespace HiFi { namespace kernels { @@ -231,3 +232,4 @@ typed_requantize_vec(uint8_t, int8_t); }; // namespace kernels }; // namespace HiFi }; // namespace impl +}; // namespace cadence diff --git a/backends/cadence/hifi/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h index b5659824615..d27e8051f52 100644 --- a/backends/cadence/hifi/kernels/kernels.h +++ b/backends/cadence/hifi/kernels/kernels.h @@ -12,6 +12,7 @@ #include #include +namespace cadence { namespace impl { namespace HiFi { namespace kernels { @@ -63,3 +64,4 @@ void dequantize( }; // namespace kernels }; // namespace HiFi }; // namespace impl +}; // namespace cadence diff --git a/backends/cadence/hifi/operators/dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp index 935ff8a5015..2a548fb231c 100644 --- a/backends/cadence/hifi/operators/dequantize_per_tensor.cpp +++ b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp @@ -10,6 +10,7 @@ #include #include +namespace cadence { namespace impl { namespace HiFi { namespace native { @@ -50,3 +51,4 @@ void dequantize_per_tensor_out( }; // namespace native }; // namespace HiFi }; // namespace impl +}; // namespace cadence diff --git a/backends/cadence/hifi/operators/quantize_per_tensor.cpp b/backends/cadence/hifi/operators/quantize_per_tensor.cpp index f17c865392a..9b2034973f3 100644 --- a/backends/cadence/hifi/operators/quantize_per_tensor.cpp +++ b/backends/cadence/hifi/operators/quantize_per_tensor.cpp @@ -10,6 +10,7 @@ #include #include +namespace cadence { namespace impl { namespace HiFi { namespace native { @@ -21,28 +22,32 @@ using executorch::runtime::KernelRuntimeContext; // Quantize the input tensor (PT2 version). Note that quant_ are not // used in any computation. void quantize_per_tensor_out( - KernelRuntimeContext& context, + KernelRuntimeContext& ctx, const Tensor& input, double scale, int64_t zero_point, - int64_t quant_min, - int64_t quant_max, + __ET_UNUSED int64_t quant_min, + __ET_UNUSED int64_t quant_max, ScalarType dtype, Tensor& out) { const float* input_data = input.const_data_ptr(); - size_t numel = out.numel(); + const size_t numel = out.numel(); if (out.scalar_type() == ScalarType::Byte) { uint8_t* out_data = out.mutable_data_ptr(); - impl::HiFi::kernels::quantize( + cadence::impl::HiFi::kernels::quantize( out_data, input_data, 1. / scale, zero_point, numel); } else if (out.scalar_type() == ScalarType::Char) { int8_t* out_data = out.mutable_data_ptr(); xa_nn_elm_quantize_f32_asym8s( out_data, input_data, scale, zero_point, numel); + } else if (out.scalar_type() == ScalarType::Short) { + int16_t* out_data = out.mutable_data_ptr(); + cadence::impl::HiFi::kernels::quantize( + out_data, input_data, 1. / scale, zero_point, numel); } else if (out.scalar_type() == ScalarType::Int) { int32_t* out_data = out.mutable_data_ptr(); - impl::HiFi::kernels::quantize( + cadence::impl::HiFi::kernels::quantize( out_data, input_data, 1. / scale, zero_point, numel); } else { ET_CHECK_MSG(false, "Unhandled input dtype %hhd", out.scalar_type()); @@ -52,3 +57,4 @@ void quantize_per_tensor_out( }; // namespace native }; // namespace HiFi }; // namespace impl +}; // namespace cadence diff --git a/backends/cadence/hifi/operators/quantized_layer_norm.cpp b/backends/cadence/hifi/operators/quantized_layer_norm.cpp index 62298bff092..439bb594f50 100644 --- a/backends/cadence/hifi/operators/quantized_layer_norm.cpp +++ b/backends/cadence/hifi/operators/quantized_layer_norm.cpp @@ -16,6 +16,7 @@ using executorch::aten::Tensor; using executorch::runtime::getLeadingDims; using executorch::runtime::KernelRuntimeContext; +namespace cadence { namespace impl { namespace HiFi { namespace native { @@ -76,10 +77,10 @@ void quantized_layer_norm_( for (size_t j = 0; j < last_dim; ++j) { // Since X is quantized, we dequantize it, compute fp32 result, and // quantize the result to an int8/uint8 value. - float val = impl::HiFi::kernels::dequantize( + float val = cadence::impl::HiFi::kernels::dequantize( x[j], input_scale, input_zero_point); val = (val - mean) * inv_std * weight_data[j] + bias_data[j]; - y[j] = impl::HiFi::kernels::quantize( + y[j] = cadence::impl::HiFi::kernels::quantize( val, output_inv_scale, output_zero_point); } } @@ -157,3 +158,4 @@ void quantized_layer_norm_out( }; // namespace native }; // namespace HiFi }; // namespace impl +}; // namespace cadence diff --git a/backends/cadence/hifi/operators/quantized_linear_out.cpp b/backends/cadence/hifi/operators/quantized_linear_out.cpp index 8a0fa5d4203..8944a24ddbc 100644 --- a/backends/cadence/hifi/operators/quantized_linear_out.cpp +++ b/backends/cadence/hifi/operators/quantized_linear_out.cpp @@ -11,6 +11,7 @@ #include #include +namespace cadence { namespace impl { namespace HiFi { namespace native { @@ -45,7 +46,7 @@ void quantized_linear_out( uint8_t* __restrict__ out_data = out.mutable_data_ptr(); // The nnlib kernel to compute quantized linear via matmul. - int32_t ret = impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u( + int32_t ret = cadence::impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u( out_data, // p_out weight_data, // p_mat1, in_data, // p_mat2, @@ -69,3 +70,4 @@ void quantized_linear_out( }; // namespace native }; // namespace HiFi }; // namespace impl +}; // namespace cadence diff --git a/backends/cadence/hifi/operators/targets.bzl b/backends/cadence/hifi/operators/targets.bzl index c7b24d790f0..a2556476a1a 100644 --- a/backends/cadence/hifi/operators/targets.bzl +++ b/backends/cadence/hifi/operators/targets.bzl @@ -26,5 +26,6 @@ def define_common_targets(): ], visibility = [ "//executorch/backends/cadence/...", + "@EXECUTORCH_CLIENTS", ], ) diff --git a/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp b/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp index 0c19e1ae593..fb944a66431 100644 --- a/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp +++ b/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp @@ -43,6 +43,7 @@ /*----------------------------Main function---------------------------------*/ +namespace cadence { namespace impl { namespace HiFi { namespace kernels { @@ -436,3 +437,4 @@ WORD32 matmul_asym8uxasym8u_asym8u( }; // namespace kernels }; // namespace HiFi }; // namespace impl +}; // namespace cadence