File tree Expand file tree Collapse file tree 4 files changed +23
-11
lines changed Expand file tree Collapse file tree 4 files changed +23
-11
lines changed Original file line number Diff line number Diff line change @@ -16,6 +16,7 @@ def define_common_targets():
16
16
compatible_with = ["ovr_config//cpu:xtensa" ],
17
17
visibility = [
18
18
"//executorch/backends/cadence/..." ,
19
+ "@EXECUTORCH_CLIENTS" ,
19
20
],
20
21
exported_deps = [
21
22
"fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib_common" ,
Original file line number Diff line number Diff line change 12
12
13
13
#define ALIGN_PTR (x, bytes ) ((((unsigned )(x)) + (bytes - 1 )) & (~(bytes - 1 )))
14
14
15
- using Tensor = executorch::aten::Tensor;
16
- using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
17
- using ScalarType = executorch::aten::ScalarType;
18
- using ::executorch::aten::IntArrayRef;
19
-
20
15
namespace cadence {
21
16
namespace impl {
22
17
namespace HiFi {
23
18
namespace native {
24
19
20
+ using ::cadence::impl::HiFi::kernels::quantize;
21
+ using ::executorch::aten::IntArrayRef;
22
+ using ::executorch::aten::ScalarType;
23
+ using ::executorch::aten::Tensor;
24
+ using ::torch::executor::KernelRuntimeContext;
25
+
25
26
// This implements a generic 2d conv kernel that operates on raw pointers.
26
27
// The version handles both quantized and fp32 convolutions.
27
28
// The input is of shape [n x c x h x w]
@@ -145,7 +146,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
145
146
if (quantized) {
146
147
float val = bias_scale * acc;
147
148
out_plane[_oh * ow + _ow] =
148
- kernels:: quantize<OT>(val, inv_out_scale, out_zero_point);
149
+ quantize<OT>(val, inv_out_scale, out_zero_point);
149
150
} else {
150
151
out_plane[_oh * ow + _ow] = acc;
151
152
}
Original file line number Diff line number Diff line change 7
7
*/
8
8
9
9
#include < executorch/backends/cadence/reference/kernels/kernels.h>
10
- #include < math.h>
11
10
#include < algorithm>
11
+ #include < cmath>
12
12
#include < cstring>
13
13
#include < limits>
14
+
14
15
namespace impl {
15
16
namespace reference {
16
17
namespace kernels {
17
18
18
19
// Quantize a fp32 value to an int8_t/uint8_t value
19
20
template <typename T>
20
21
T quantize (const float x, float scale, int32_t zero_point) {
21
- constexpr float min_val = std::numeric_limits<T>::min ();
22
- constexpr float max_val = std::numeric_limits<T>::max ();
23
- float tmp = roundf (x * scale + zero_point);
24
- return std::max (std::min (tmp, max_val), min_val);
22
+ // constexpr float min_val = std::numeric_limits<T>::min();
23
+ // constexpr float max_val = std::numeric_limits<T>::max();
24
+ // float tmp = roundf(x * scale + zero_point);
25
+ // return std::max(std::min(tmp, max_val), min_val);
26
+ // Match Executorch CPU kernel implementation at
27
+ // https://fburl.com/code/fxizw6u6
28
+ int64_t qvalue;
29
+ qvalue = static_cast <int64_t >(zero_point + std::nearbyint (scale * x));
30
+
31
+ qvalue = std::max<int64_t >(qvalue, std::numeric_limits<T>::min ());
32
+ qvalue = std::min<int64_t >(qvalue, std::numeric_limits<T>::max ());
33
+ return static_cast <T>(qvalue);
25
34
}
26
35
27
36
// Quantize an fp32 array to an int8_t/uint8_t array
Original file line number Diff line number Diff line change @@ -10,6 +10,7 @@ def define_common_targets():
10
10
],
11
11
visibility = [
12
12
"//executorch/backends/cadence/..." ,
13
+ "@EXECUTORCH_CLIENTS" ,
13
14
],
14
15
platforms = CXX ,
15
16
)
You can’t perform that action at this time.
0 commit comments