Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions backends/cadence/aot/functions_hifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,21 @@
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_out
kernel_name: cadence::impl::HiFi::quantize_per_tensor_out

- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_out
kernel_name: cadence::impl::HiFi::dequantize_per_tensor_out


- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantized_layer_norm_out
kernel_name: cadence::impl::HiFi::quantized_layer_norm_out

- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantized_linear_out
kernel_name: cadence::impl::HiFi::quantized_linear_out
2 changes: 2 additions & 0 deletions backends/cadence/hifi/kernels/kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <xa_nnlib_common.h>
#include <xa_nnlib_common_macros.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace kernels {
Expand Down Expand Up @@ -231,3 +232,4 @@ typed_requantize_vec(uint8_t, int8_t);
}; // namespace kernels
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
2 changes: 2 additions & 0 deletions backends/cadence/hifi/kernels/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <stddef.h>
#include <xa_type_def.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace kernels {
Expand Down Expand Up @@ -63,3 +64,4 @@ void dequantize(
}; // namespace kernels
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
2 changes: 2 additions & 0 deletions backends/cadence/hifi/operators/dequantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <executorch/runtime/kernel/kernel_includes.h>
#include <xa_nnlib_kernels_api.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand Down Expand Up @@ -50,3 +51,4 @@ void dequantize_per_tensor_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
18 changes: 12 additions & 6 deletions backends/cadence/hifi/operators/quantize_per_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <executorch/runtime/kernel/kernel_includes.h>
#include <xa_nnlib_kernels_api.h>

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand All @@ -21,28 +22,32 @@ using executorch::runtime::KernelRuntimeContext;
// Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
// used in any computation.
void quantize_per_tensor_out(
KernelRuntimeContext& context,
KernelRuntimeContext& ctx,
const Tensor& input,
double scale,
int64_t zero_point,
int64_t quant_min,
int64_t quant_max,
__ET_UNUSED int64_t quant_min,
__ET_UNUSED int64_t quant_max,
ScalarType dtype,
Tensor& out) {
const float* input_data = input.const_data_ptr<float>();
size_t numel = out.numel();
const size_t numel = out.numel();

if (out.scalar_type() == ScalarType::Byte) {
uint8_t* out_data = out.mutable_data_ptr<uint8_t>();
impl::HiFi::kernels::quantize<uint8_t>(
cadence::impl::HiFi::kernels::quantize<uint8_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Char) {
int8_t* out_data = out.mutable_data_ptr<int8_t>();
xa_nn_elm_quantize_f32_asym8s(
out_data, input_data, scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Short) {
int16_t* out_data = out.mutable_data_ptr<int16_t>();
cadence::impl::HiFi::kernels::quantize<int16_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else if (out.scalar_type() == ScalarType::Int) {
int32_t* out_data = out.mutable_data_ptr<int32_t>();
impl::HiFi::kernels::quantize<int32_t>(
cadence::impl::HiFi::kernels::quantize<int32_t>(
out_data, input_data, 1. / scale, zero_point, numel);
} else {
ET_CHECK_MSG(false, "Unhandled input dtype %hhd", out.scalar_type());
Expand All @@ -52,3 +57,4 @@ void quantize_per_tensor_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
6 changes: 4 additions & 2 deletions backends/cadence/hifi/operators/quantized_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ using executorch::aten::Tensor;
using executorch::runtime::getLeadingDims;
using executorch::runtime::KernelRuntimeContext;

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand Down Expand Up @@ -76,10 +77,10 @@ void quantized_layer_norm_(
for (size_t j = 0; j < last_dim; ++j) {
// Since X is quantized, we dequantize it, compute fp32 result, and
// quantize the result to an int8/uint8 value.
float val = impl::HiFi::kernels::dequantize<T>(
float val = cadence::impl::HiFi::kernels::dequantize<T>(
x[j], input_scale, input_zero_point);
val = (val - mean) * inv_std * weight_data[j] + bias_data[j];
y[j] = impl::HiFi::kernels::quantize<T>(
y[j] = cadence::impl::HiFi::kernels::quantize<T>(
val, output_inv_scale, output_zero_point);
}
}
Expand Down Expand Up @@ -157,3 +158,4 @@ void quantized_layer_norm_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
4 changes: 3 additions & 1 deletion backends/cadence/hifi/operators/quantized_linear_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <algorithm>
#include <cmath>

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {
Expand Down Expand Up @@ -45,7 +46,7 @@ void quantized_linear_out(
uint8_t* __restrict__ out_data = out.mutable_data_ptr<uint8_t>();

// The nnlib kernel to compute quantized linear via matmul.
int32_t ret = impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u(
int32_t ret = cadence::impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u(
out_data, // p_out
weight_data, // p_mat1,
in_data, // p_mat2,
Expand All @@ -69,3 +70,4 @@ void quantized_linear_out(
}; // namespace native
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
1 change: 1 addition & 0 deletions backends/cadence/hifi/operators/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ def define_common_targets():
],
visibility = [
"//executorch/backends/cadence/...",
"@EXECUTORCH_CLIENTS",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

/*----------------------------Main function---------------------------------*/

namespace cadence {
namespace impl {
namespace HiFi {
namespace kernels {
Expand Down Expand Up @@ -436,3 +437,4 @@ WORD32 matmul_asym8uxasym8u_asym8u(
}; // namespace kernels
}; // namespace HiFi
}; // namespace impl
}; // namespace cadence
Loading