Skip to content

Commit 705150c

Browse files
authored
Use OSS kernels everywhere
Differential Revision: D81203389 Pull Request resolved: #13884
1 parent fa4d935 commit 705150c

File tree

4 files changed

+23
-11
lines changed

4 files changed

+23
-11
lines changed

backends/cadence/hifi/kernels/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def define_common_targets():
1616
compatible_with = ["ovr_config//cpu:xtensa"],
1717
visibility = [
1818
"//executorch/backends/cadence/...",
19+
"@EXECUTORCH_CLIENTS",
1920
],
2021
exported_deps = [
2122
"fbsource//third-party/nnlib-hifi4/xa_nnlib:libxa_nnlib_common",

backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,17 @@
1212

1313
#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
1414

15-
using Tensor = executorch::aten::Tensor;
16-
using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
17-
using ScalarType = executorch::aten::ScalarType;
18-
using ::executorch::aten::IntArrayRef;
19-
2015
namespace cadence {
2116
namespace impl {
2217
namespace HiFi {
2318
namespace native {
2419

20+
using ::cadence::impl::HiFi::kernels::quantize;
21+
using ::executorch::aten::IntArrayRef;
22+
using ::executorch::aten::ScalarType;
23+
using ::executorch::aten::Tensor;
24+
using ::torch::executor::KernelRuntimeContext;
25+
2526
// This implements a generic 2d conv kernel that operates on raw pointers.
2627
// The version handles both quantized and fp32 convolutions.
2728
// The input is of shape [n x c x h x w]
@@ -145,7 +146,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic(
145146
if (quantized) {
146147
float val = bias_scale * acc;
147148
out_plane[_oh * ow + _ow] =
148-
kernels::quantize<OT>(val, inv_out_scale, out_zero_point);
149+
quantize<OT>(val, inv_out_scale, out_zero_point);
149150
} else {
150151
out_plane[_oh * ow + _ow] = acc;
151152
}

backends/cadence/reference/kernels/kernels.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,30 @@
77
*/
88

99
#include <executorch/backends/cadence/reference/kernels/kernels.h>
10-
#include <math.h>
1110
#include <algorithm>
11+
#include <cmath>
1212
#include <cstring>
1313
#include <limits>
14+
1415
namespace impl {
1516
namespace reference {
1617
namespace kernels {
1718

1819
// Quantize a fp32 value to an int8_t/uint8_t value
1920
template <typename T>
2021
T quantize(const float x, float scale, int32_t zero_point) {
21-
constexpr float min_val = std::numeric_limits<T>::min();
22-
constexpr float max_val = std::numeric_limits<T>::max();
23-
float tmp = roundf(x * scale + zero_point);
24-
return std::max(std::min(tmp, max_val), min_val);
22+
// constexpr float min_val = std::numeric_limits<T>::min();
23+
// constexpr float max_val = std::numeric_limits<T>::max();
24+
// float tmp = roundf(x * scale + zero_point);
25+
// return std::max(std::min(tmp, max_val), min_val);
26+
// Match Executorch CPU kernel implementation at
27+
// https://fburl.com/code/fxizw6u6
28+
int64_t qvalue;
29+
qvalue = static_cast<int64_t>(zero_point + std::nearbyint(scale * x));
30+
31+
qvalue = std::max<int64_t>(qvalue, std::numeric_limits<T>::min());
32+
qvalue = std::min<int64_t>(qvalue, std::numeric_limits<T>::max());
33+
return static_cast<T>(qvalue);
2534
}
2635

2736
// Quantize an fp32 array to an int8_t/uint8_t array

backends/cadence/reference/kernels/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def define_common_targets():
1010
],
1111
visibility = [
1212
"//executorch/backends/cadence/...",
13+
"@EXECUTORCH_CLIENTS",
1314
],
1415
platforms = CXX,
1516
)

0 commit comments

Comments
 (0)