Skip to content

Commit 95a3ce0

Browse files
authored
Merge pull request #25 from dijopaul/main
Adding operators: bitwiseand, bitwiseor, bitwisexor, embedding, eq, fmod, ge, gt, hardtanh, le, lt, maskedfill, ne, select_copy and quantisedmatmul
2 parents be36cc5 + 6373f69 commit 95a3ce0

23 files changed

+5336
-8
lines changed

backends/cadence/aot/functions_hifi.yaml

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,36 @@
3232
- arg_meta: null
3333
kernel_name: cadence::impl::HiFi::add_out
3434

35+
- op: bitwise_and.Scalar_out
36+
kernels:
37+
- arg_meta: null
38+
kernel_name: cadence::impl::HiFi::bitwise_and_Scalar_out
39+
40+
- op: bitwise_and.Tensor_out
41+
kernels:
42+
- arg_meta: null
43+
kernel_name: cadence::impl::HiFi::bitwise_and_Tensor_out
44+
45+
- op: bitwise_or.Scalar_out
46+
kernels:
47+
- arg_meta: null
48+
kernel_name: cadence::impl::HiFi::bitwise_or_Scalar_out
49+
50+
- op: bitwise_or.Tensor_out
51+
kernels:
52+
- arg_meta: null
53+
kernel_name: cadence::impl::HiFi::bitwise_or_Tensor_out
54+
55+
- op: bitwise_xor.Scalar_out
56+
kernels:
57+
- arg_meta: null
58+
kernel_name: cadence::impl::HiFi::bitwise_xor_Scalar_out
59+
60+
- op: bitwise_xor.Tensor_out
61+
kernels:
62+
- arg_meta: null
63+
kernel_name: cadence::impl::HiFi::bitwise_xor_Tensor_out
64+
3565
- op: bmm.out
3666
kernels:
3767
- arg_meta: null
@@ -65,27 +95,82 @@
6595
- op: embedding.out
6696
kernels:
6797
- arg_meta: null
68-
kernel_name: torch::executor::embedding_out
98+
kernel_name: cadence::impl::HiFi::embedding_out
99+
100+
- op: eq.Tensor_out
101+
kernels:
102+
- arg_meta: null
103+
kernel_name: cadence::impl::HiFi::eq_tensor_out
104+
105+
- op: fmod.Tensor_out
106+
kernels:
107+
- arg_meta: null
108+
kernel_name: cadence::impl::HiFi::fmod_Tensor_out
109+
110+
- op: fmod.Scalar_out
111+
kernels:
112+
- arg_meta: null
113+
kernel_name: cadence::impl::HiFi::fmod_Scalar_out
69114

70115
- op: full.out
71116
kernels:
72117
- arg_meta: null
73118
kernel_name: cadence::impl::HiFi::full_out
74119

75-
- op: gt.Scalar_out
120+
- op: ge.Scalar_out
121+
kernels:
122+
- arg_meta: null
123+
kernel_name: cadence::impl::HiFi::ge_scalar_out
124+
125+
- op: ge.Tensor_out
76126
kernels:
77127
- arg_meta: null
78-
kernel_name: torch::executor::gt_scalar_out
128+
kernel_name: cadence::impl::HiFi::ge_tensor_out
79129

80130
- op: gelu.out
81131
kernels:
82132
- arg_meta: null
83133
kernel_name: torch::executor::gelu_out
84134

135+
- op: gt.Scalar_out
136+
kernels:
137+
- arg_meta: null
138+
kernel_name: cadence::impl::HiFi::gt_scalar_out
139+
140+
- op: gt.Tensor_out
141+
kernels:
142+
- arg_meta: null
143+
kernel_name: cadence::impl::HiFi::gt_tensor_out
144+
85145
- op: hardtanh.out
86146
kernels:
87147
- arg_meta: null
88-
kernel_name: torch::executor::hardtanh_out
148+
kernel_name: cadence::impl::HiFi::hardtanh_out
149+
150+
- op: le.Scalar_out
151+
kernels:
152+
- arg_meta: null
153+
kernel_name: cadence::impl::HiFi::le_scalar_out
154+
155+
- op: le.Tensor_out
156+
kernels:
157+
- arg_meta: null
158+
kernel_name: cadence::impl::HiFi::le_tensor_out
159+
160+
- op: lt.Scalar_out
161+
kernels:
162+
- arg_meta: null
163+
kernel_name: cadence::impl::HiFi::lt_scalar_out
164+
165+
- op: lt.Tensor_out
166+
kernels:
167+
- arg_meta: null
168+
kernel_name: cadence::impl::HiFi::lt_tensor_out
169+
170+
- op: masked_fill.Scalar_out
171+
kernels:
172+
- arg_meta: null
173+
kernel_name: cadence::impl::HiFi::masked_fill_scalar_out
89174

90175
- op: max_pool2d_with_indices.out
91176
kernels:
@@ -117,6 +202,11 @@
117202
- arg_meta: null
118203
kernel_name: cadence::impl::HiFi::mul_out
119204

205+
- op: ne.Tensor_out
206+
kernels:
207+
- arg_meta: null
208+
kernel_name: cadence::impl::HiFi::ne_tensor_out
209+
120210
- op: permute_copy.out
121211
kernels:
122212
- arg_meta: null
@@ -147,6 +237,11 @@
147237
- arg_meta: null
148238
kernel_name: cadence::impl::HiFi::rsqrt_out
149239

240+
- op: select_copy.int_out
241+
kernels:
242+
- arg_meta: null
243+
kernel_name: cadence::impl::HiFi::select_copy_int_out
244+
150245
- op: sigmoid.out
151246
kernels:
152247
- arg_meta: null
@@ -239,6 +334,11 @@
239334
- arg_meta: null
240335
kernel_name: cadence::impl::HiFi::quantized_fully_connected_out
241336

337+
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
338+
kernels:
339+
- arg_meta: null
340+
kernel_name: cadence::impl::HiFi::quantized_matmul_out
341+
242342
- func: cadence::quantized_fully_connected.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
243343
kernels:
244344
- arg_meta: null

backends/cadence/hifi/kernels/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ add_library(
1616
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_clamp_f32_broadcast.c
1717
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_f32_broadcast.c
1818
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_mode_f32_broadcast.c
19+
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_fmod_broadcast_f32.c
20+
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_greater_lesser_equal_f32.c
21+
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_logicalxor_bool_bool.c
1922
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_minimum_maximum_f32.c
2023
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c
2124
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_pow_f32.c

backends/cadence/hifi/kernels/kernels.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,42 @@ extern "C" WORD32 xa_nn_elm_div_mode_broadcast_4D_f32xf32_f32(
8888
const WORD32* const p_inp2_shape,
8989
WORD32 mode);
9090

91+
extern "C" WORD32 xa_nn_elm_greater_lesser_equal_f32xf32_f32(
92+
WORD8* __restrict__ p_out,
93+
const FLOAT32* __restrict__ p_inp1,
94+
const FLOAT32* __restrict__ p_inp2,
95+
WORD32 num_elm,
96+
WORD32 kernel_type);
97+
98+
extern "C" WORD32 xa_nn_elm_greater_lesser_equal_broadcast_4D_f32xf32_f32(
99+
WORD8* __restrict__ p_out,
100+
const WORD32* const p_out_shape,
101+
const FLOAT32* __restrict__ p_inp1,
102+
const WORD32* const p_inp1_shape,
103+
const FLOAT32* __restrict__ p_inp2,
104+
const WORD32* const p_inp2_shape,
105+
WORD32 kernel_type);
106+
107+
extern "C" WORD32 xa_nn_elm_fmod_f32xf32_f32(
108+
FLOAT32* __restrict__ p_out,
109+
const FLOAT32* __restrict__ p_inp1,
110+
const FLOAT32* __restrict__ p_inp2,
111+
WORD32 num_elm);
112+
113+
extern "C" WORD32 xa_nn_elm_fmod_broadcast_4D_f32xf32_f32(
114+
FLOAT32* __restrict__ p_out,
115+
const WORD32* const p_out_shape,
116+
const FLOAT32* __restrict__ p_inp1,
117+
const WORD32* const p_inp1_shape,
118+
const FLOAT32* __restrict__ p_inp2,
119+
const WORD32* const p_inp2_shape);
120+
121+
extern "C" WORD32 xa_nn_elm_logicalxor_boolxbool_bool(
122+
WORD8* __restrict__ p_out,
123+
const WORD8* __restrict__ p_inp1,
124+
const WORD8* __restrict__ p_inp2,
125+
WORD32 num_elm);
126+
91127
extern "C" WORD32 xa_nn_elm_maximum_f32xf32_f32(
92128
FLOAT32* __restrict__ p_out,
93129
const FLOAT32* __restrict__ p_inp1,

backends/cadence/hifi/operators/CMakeLists.txt

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,34 @@ endif()
2222
set(_aten_ops__srcs
2323
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp"
2424
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_atan2.cpp"
25+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bitwise_and.cpp"
26+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bitwise_or.cpp"
27+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bitwise_xor.cpp"
2528
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bmm.cpp"
2629
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_cat.cpp"
2730
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_clamp.cpp"
2831
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp"
32+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_embedding.cpp"
33+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_eq.cpp"
34+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_fmod.cpp"
2935
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_full.cpp"
36+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_ge.cpp"
37+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_gt.cpp"
38+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_hardtanh.cpp"
39+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_le.cpp"
40+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_lt.cpp"
41+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_masked_fill.cpp"
3042
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_maximum.cpp"
3143
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mean.cpp"
3244
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_minimum.cpp"
3345
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mm.cpp"
3446
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
47+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_ne.cpp"
3548
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_permute_copy.cpp"
3649
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_pow.cpp"
3750
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_remainder.cpp"
3851
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_rsqrt.cpp"
52+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_select_copy.cpp"
3953
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_slice_copy.cpp"
4054
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_softmax.cpp"
4155
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_split_with_sizes_copy.cpp"
@@ -45,22 +59,21 @@ set(_aten_ops__srcs
4559
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_view_copy.cpp"
4660
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_where.cpp"
4761
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
48-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
49-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_gt.cpp"
5062
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_gelu.cpp"
51-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_hardtanh.cpp"
5263
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_max_pool2d_with_indices.cpp"
5364
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
5465
"${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp"
5566
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
5667
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
5768
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
5869
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/dtype_util.cpp"
70+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/delinearize_index.cpp"
5971
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/index_util.cpp"
6072
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/kernel_ops_util.cpp"
6173
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp"
6274
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp"
6375
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
76+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/select_copy_util.cpp"
6477
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
6578
)
6679
add_library(aten_ops_cadence ${_aten_ops__srcs})
@@ -78,7 +91,7 @@ target_include_directories(
7891

7992
# Custom ops that are needed to run the test model.
8093
add_library(
81-
custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp"
94+
custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp" "quantized_matmul_out.cpp"
8295
"op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp"
8396
"op_quantized_conv_out.cpp" "op_quantized_fully_connected_out"
8497
)

0 commit comments

Comments
 (0)