pytorch · zonglinpeng · Oct 17, 2024
diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml
@@ -107,21 +107,21 @@
   variants: function
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantize_per_tensor_out
+      kernel_name: cadence::impl::HiFi::quantize_per_tensor_out
 
 - func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::dequantize_per_tensor_out
+      kernel_name: cadence::impl::HiFi::dequantize_per_tensor_out
 
 
 - func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantized_layer_norm_out
+      kernel_name: cadence::impl::HiFi::quantized_layer_norm_out
 
 - func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
   kernels:
     - arg_meta: null
-      kernel_name: impl::HiFi::quantized_linear_out
+      kernel_name: cadence::impl::HiFi::quantized_linear_out
diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
@@ -10,6 +10,7 @@
 #include <xa_nnlib_common.h>
 #include <xa_nnlib_common_macros.h>
 
+namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace kernels {
@@ -231,3 +232,4 @@ typed_requantize_vec(uint8_t, int8_t);
 }; // namespace kernels
 }; // namespace HiFi
 }; // namespace impl
+}; // namespace cadence
diff --git a/backends/cadence/hifi/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h
@@ -12,6 +12,7 @@
 #include <stddef.h>
 #include <xa_type_def.h>
 
+namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace kernels {
@@ -63,3 +64,4 @@ void dequantize(
 }; // namespace kernels
 }; // namespace HiFi
 }; // namespace impl
+}; // namespace cadence
diff --git a/backends/cadence/hifi/operators/dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp
@@ -10,6 +10,7 @@
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <xa_nnlib_kernels_api.h>
 
+namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace native {
@@ -50,3 +51,4 @@ void dequantize_per_tensor_out(
 }; // namespace native
 }; // namespace HiFi
 }; // namespace impl
+}; // namespace cadence
diff --git a/backends/cadence/hifi/operators/quantize_per_tensor.cpp b/backends/cadence/hifi/operators/quantize_per_tensor.cpp
@@ -10,6 +10,7 @@
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <xa_nnlib_kernels_api.h>
 
+namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace native {
@@ -21,28 +22,32 @@ using executorch::runtime::KernelRuntimeContext;
 // Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
 // used in any computation.
 void quantize_per_tensor_out(
-    KernelRuntimeContext& context,
+    KernelRuntimeContext& ctx,
     const Tensor& input,
     double scale,
     int64_t zero_point,
-    int64_t quant_min,
-    int64_t quant_max,
+    __ET_UNUSED int64_t quant_min,
+    __ET_UNUSED int64_t quant_max,
     ScalarType dtype,
     Tensor& out) {
   const float* input_data = input.const_data_ptr<float>();
-  size_t numel = out.numel();
+  const size_t numel = out.numel();
 
   if (out.scalar_type() == ScalarType::Byte) {
     uint8_t* out_data = out.mutable_data_ptr<uint8_t>();
-    impl::HiFi::kernels::quantize<uint8_t>(
+    cadence::impl::HiFi::kernels::quantize<uint8_t>(
         out_data, input_data, 1. / scale, zero_point, numel);
   } else if (out.scalar_type() == ScalarType::Char) {
     int8_t* out_data = out.mutable_data_ptr<int8_t>();
     xa_nn_elm_quantize_f32_asym8s(
         out_data, input_data, scale, zero_point, numel);
+  } else if (out.scalar_type() == ScalarType::Short) {
+    int16_t* out_data = out.mutable_data_ptr<int16_t>();
+    cadence::impl::HiFi::kernels::quantize<int16_t>(
+        out_data, input_data, 1. / scale, zero_point, numel);
   } else if (out.scalar_type() == ScalarType::Int) {
     int32_t* out_data = out.mutable_data_ptr<int32_t>();
-    impl::HiFi::kernels::quantize<int32_t>(
+    cadence::impl::HiFi::kernels::quantize<int32_t>(
         out_data, input_data, 1. / scale, zero_point, numel);
   } else {
     ET_CHECK_MSG(false, "Unhandled input dtype %hhd", out.scalar_type());
@@ -52,3 +57,4 @@ void quantize_per_tensor_out(
 }; // namespace native
 }; // namespace HiFi
 }; // namespace impl
+}; // namespace cadence
diff --git a/backends/cadence/hifi/operators/quantized_layer_norm.cpp b/backends/cadence/hifi/operators/quantized_layer_norm.cpp
@@ -16,6 +16,7 @@ using executorch::aten::Tensor;
 using executorch::runtime::getLeadingDims;
 using executorch::runtime::KernelRuntimeContext;
 
+namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace native {
@@ -76,10 +77,10 @@ void quantized_layer_norm_(
     for (size_t j = 0; j < last_dim; ++j) {
       // Since X is quantized, we dequantize it, compute fp32 result, and
       // quantize the result to an int8/uint8 value.
-      float val = impl::HiFi::kernels::dequantize<T>(
+      float val = cadence::impl::HiFi::kernels::dequantize<T>(
           x[j], input_scale, input_zero_point);
       val = (val - mean) * inv_std * weight_data[j] + bias_data[j];
-      y[j] = impl::HiFi::kernels::quantize<T>(
+      y[j] = cadence::impl::HiFi::kernels::quantize<T>(
           val, output_inv_scale, output_zero_point);
     }
   }
@@ -157,3 +158,4 @@ void quantized_layer_norm_out(
 }; // namespace native
 }; // namespace HiFi
 }; // namespace impl
+}; // namespace cadence
diff --git a/backends/cadence/hifi/operators/quantized_linear_out.cpp b/backends/cadence/hifi/operators/quantized_linear_out.cpp
@@ -11,6 +11,7 @@
 #include <algorithm>
 #include <cmath>
 
+namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace native {
@@ -45,7 +46,7 @@ void quantized_linear_out(
   uint8_t* __restrict__ out_data = out.mutable_data_ptr<uint8_t>();
 
   // The nnlib kernel to compute quantized linear via matmul.
-  int32_t ret = impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u(
+  int32_t ret = cadence::impl::HiFi::kernels::matmul_asym8uxasym8u_asym8u(
       out_data, // p_out
       weight_data, // p_mat1,
       in_data, // p_mat2,
@@ -69,3 +70,4 @@ void quantized_linear_out(
 }; // namespace native
 }; // namespace HiFi
 }; // namespace impl
+}; // namespace cadence
diff --git a/backends/cadence/hifi/operators/targets.bzl b/backends/cadence/hifi/operators/targets.bzl
@@ -26,5 +26,6 @@ def define_common_targets():
         ],
         visibility = [
             "//executorch/backends/cadence/...",
+            "@EXECUTORCH_CLIENTS",
         ],
     )
diff --git a/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp b/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
@@ -43,6 +43,7 @@
 
 /*----------------------------Main function---------------------------------*/
 
+namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace kernels {
@@ -436,3 +437,4 @@ WORD32 matmul_asym8uxasym8u_asym8u(
 }; // namespace kernels
 }; // namespace HiFi
 }; // namespace impl
+}; // namespace cadence