77 */
88
99#pragma once
10-
10+ # include < executorch/runtime/kernel/kernel_includes.h >
1111#include < inttypes.h>
1212#include < stddef.h>
1313#include < xa_type_def.h>
1414/* For NNLIB APIs */
1515#include " xa_nnlib_kernels_api.h"
1616
17+ using executorch::runtime::KernelRuntimeContext;
18+ using executorch::runtime::Result;
19+
1720/* Potential NNLIB function/APIs */
1821
1922extern " C" WORD32 xa_nn_broadcast_32_32 (
@@ -23,6 +26,16 @@ extern "C" WORD32 xa_nn_broadcast_32_32(
2326 const int * const in_shape,
2427 int num_dims);
2528
29+ extern " C" WORD32 xa_nn_concat_32_32 (
30+ WORD32* __restrict__ p_out,
31+ const WORD32* const p_out_shape,
32+ const WORD32** pp_inps,
33+ const WORD32* const * pp_inps_shape,
34+ WORD32 num_out_dims,
35+ WORD32 num_inp,
36+ WORD32 num_inp_dims,
37+ WORD32 axis);
38+
2639extern " C" WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32 (
2740 FLOAT32* __restrict__ p_out,
2841 const WORD32* const p_out_shape,
@@ -31,6 +44,26 @@ extern "C" WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32(
3144 const FLOAT32* __restrict__ p_inp2,
3245 const WORD32* const p_inp2_shape);
3346
47+ extern " C" void
48+ xa_nn_elm_atan2_f32 (FLOAT32* z, const FLOAT32* y, const FLOAT32* x, WORD32 N);
49+
50+ extern " C" WORD32 xa_nn_elm_clamp_f32xf32xf32_f32 (
51+ FLOAT32* __restrict__ p_out,
52+ const FLOAT32* __restrict__ p_inp,
53+ const FLOAT32* __restrict__ p_min,
54+ const FLOAT32* __restrict__ p_max,
55+ WORD32 num_elm);
56+
57+ extern " C" WORD32 xa_nn_elm_clamp_broadcast_4D_f32Xf32xf32_f32 (
58+ FLOAT32* __restrict__ p_out,
59+ const WORD32* const p_out_shape,
60+ const FLOAT32* __restrict__ p_inp,
61+ const WORD32* const p_inp_shape,
62+ const FLOAT32* __restrict__ p_min,
63+ const WORD32* const p_min_shape,
64+ const FLOAT32* __restrict__ p_max,
65+ const WORD32* const p_max_shape);
66+
3467extern " C" WORD32 xa_nn_elm_div_broadcast_4D_f32xf32_f32 (
3568 FLOAT32* __restrict__ p_out,
3669 const WORD32* const p_out_shape,
@@ -97,6 +130,20 @@ extern "C" void xa_nn_elm_pow_f32(
97130 const FLOAT32* __restrict__ y,
98131 WORD32 N);
99132
133+ extern " C" WORD32 xa_nn_elm_remainder_f32xf32_f32 (
134+ FLOAT32* __restrict__ p_out,
135+ const FLOAT32* __restrict__ p_inp1,
136+ const FLOAT32* __restrict__ p_inp2,
137+ WORD32 num_elm);
138+
139+ extern " C" WORD32 xa_nn_elm_remainder_broadcast_4D_f32xf32_f32 (
140+ FLOAT32* __restrict__ p_out,
141+ const WORD32* const p_out_shape,
142+ const FLOAT32* __restrict__ p_inp1,
143+ const WORD32* const p_inp1_shape,
144+ const FLOAT32* __restrict__ p_inp2,
145+ const WORD32* const p_inp2_shape);
146+
100147extern " C" WORD32 xa_nn_elm_where_f32xf32_f32 (
101148 FLOAT32* __restrict__ p_out,
102149 const FLOAT32* __restrict__ p_inp1,
@@ -125,11 +172,22 @@ extern "C" WORD32 xa_nn_reduce_mean_4D_f32_f32(
125172 WORD32 num_axis_dims,
126173 void * __restrict__ p_scratch_in);
127174
175+ extern " C" WORD32 xa_nn_transpose_32_32 (
176+ WORD32* __restrict__ p_out,
177+ const WORD32* const p_out_shape,
178+ const WORD32* __restrict__ p_inp,
179+ const WORD32* const p_inp_shape,
180+ const WORD32* __restrict__ p_permute_vec,
181+ WORD32 num_out_dims,
182+ WORD32 num_inp_dims);
183+
128184namespace cadence {
129185namespace impl {
130186namespace HiFi {
131187namespace kernels {
132188
189+ void * allocate_temp_memory (KernelRuntimeContext& ctx, size_t size);
190+
133191void memcpy (void * dst, const void * src, size_t num_bytes);
134192
135193WORD32 matmul_asym8uxasym8u_asym8u (
0 commit comments