Skip to content

Commit 4e86f50

Browse files
FargkJaccovG
authored andcommitted
Safe ptr cast eltwise kernels
* Safe pointer case added to eltwise_sub * as extra, dependency files added in .gitignore * unsafe pointer casts fixed in other eltwise kernels
1 parent 18b1427 commit 4e86f50

File tree

2 files changed

+27
-22
lines changed

2 files changed

+27
-22
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ example_har_smartphone/MIDE_EM/.settings/*
1616
*.db
1717
.project
1818
.vscode
19+
obj/*.d

lib/src/kernels/eltwise/mli_krn_eltwise.h

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_sub_fx (
6161
// Simple broadcast (vector on scalar)
6262
//==============================================
6363
if (op2_size == 1) {
64+
const io_T broadcast_val = *(const io_T *)op2;
6465
// Vector minus scalar
65-
for (int idx = 0; idx < op1_size; idx++) out[idx] = mli_math_sub_fx(op1[idx], *op2);
66+
for (int idx = 0; idx < op1_size; idx++) out[idx] = mli_math_sub_fx(op1[idx], broadcast_val);
6667
} else if (op1_size == 1) {
68+
const io_T broadcast_val = *(const io_T *)op1;
6769
// Scalar minus Vector
68-
for (int idx = 0; idx < op2_size; idx++) out[idx] = mli_math_sub_fx(*op1, op2[idx]);
70+
for (int idx = 0; idx < op2_size; idx++) out[idx] = mli_math_sub_fx(broadcast_val, op2[idx]);
6971
} else {
7072
// Elemetnwise between tensors of the same shape
7173
//==============================================
@@ -87,7 +89,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_add_fx (
8789
// Simple broadcast (vector on scalar)
8890
//==============================================
8991
if (op1_size == 1 || op2_size == 1) {
90-
const int8_t broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
92+
const int8_t broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
9193
const MLI_PTR(int8_t) vec = (op1_size > op2_size) ? (MLI_PTR(int8_t))op1 : (MLI_PTR(int8_t))op2;
9294
const int out_size = MAX(op1_size, op2_size);
9395

@@ -116,7 +118,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_max_fx (
116118
// Simple broadcast (vector on scalar)
117119
//==============================================
118120
if (op1_size == 1 || op2_size == 1) {
119-
const int8_t broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
121+
const int8_t broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
120122
const MLI_PTR(int8_t) vec = (op1_size > op2_size) ? (MLI_PTR(int8_t))op1 : (MLI_PTR(int8_t))op2;
121123
const int out_size = MAX(op1_size, op2_size);
122124

@@ -145,7 +147,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_min_fx (
145147
// Simple broadcast (vector on scalar)
146148
//==============================================
147149
if (op1_size == 1 || op2_size == 1) {
148-
const int8_t broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
150+
const int8_t broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
149151
const MLI_PTR(int8_t) vec = (op1_size > op2_size) ? (MLI_PTR(int8_t))op1 : (MLI_PTR(int8_t))op2;
150152
const int out_size = MAX(op1_size, op2_size);
151153

@@ -175,13 +177,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_add_fx (
175177
// Simple broadcast (vector on scalar)
176178
//==============================================
177179
if (op1_size == 1 || op2_size == 1) {
178-
const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
180+
const io_T broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
179181
const MLI_PTR(io_T) vec = (op1_size > op2_size) ? (MLI_PTR(io_T))op1 : (MLI_PTR(io_T))op2;
180182
const int out_size = MAX(op1_size, op2_size);
181183

182-
io_T broadcast_val_v2[] = {broadcast_val, broadcast_val};
184+
const v2q15_t broadcast_val_v2 = fx_create_v2q15(broadcast_val, broadcast_val);
183185
for (int idx = 0; idx < out_size / 2; idx++) {
184-
mli_prv_store_2_samples(out, mli_prv_load_add_vec2(vec, (const MLI_PTR(io_T)) & broadcast_val_v2));
186+
mli_prv_store_2_samples(out, fx_add_v2q15(broadcast_val_v2, mli_prv_load_2_samples(vec)));
185187
vec += 2;
186188
out += 2;
187189
}
@@ -216,26 +218,28 @@ static inline void __attribute__ ((always_inline)) eltwise_op_sub_fx (
216218
// Simple broadcast (vector on scalar)
217219
//==============================================
218220
if (op2_size == 1) {
219-
io_T broadcast_val_v2[] = {*op2, *op2};
221+
const io_T broadcast_val = *(const io_T *)op2;
222+
const v2q15_t broadcast_val_v2 = fx_create_v2q15(broadcast_val, broadcast_val);
220223
// Vector minus scalar
221224
for (int idx = 0; idx < op1_size / 2; idx++) {
222-
mli_prv_store_2_samples(out, mli_prv_load_sub_vec2(op1, (const MLI_PTR(io_T)) & broadcast_val_v2));
225+
mli_prv_store_2_samples(out, fx_sub_v2q15(mli_prv_load_2_samples(op1), broadcast_val_v2));
223226
op1 += 2;
224227
out += 2;
225228
}
226229
if (op1_size & 1) {
227-
*out++ = mli_math_sub_fx(*op1++, *op2);
230+
*out++ = mli_math_sub_fx(*op1++, broadcast_val);
228231
}
229232
} else if (op1_size == 1) {
230-
io_T broadcast_val_v2[] = {*op1, *op1};
233+
const io_T broadcast_val = *(const io_T *)op1;
234+
const v2q15_t broadcast_val_v2 = fx_create_v2q15(broadcast_val, broadcast_val);
231235
// Scalar minus Vector
232236
for (int idx = 0; idx < op2_size / 2; idx++) {
233-
mli_prv_store_2_samples(out, mli_prv_load_sub_vec2((const MLI_PTR(io_T)) & broadcast_val_v2, op2));
237+
mli_prv_store_2_samples(out, fx_sub_v2q15(broadcast_val_v2, mli_prv_load_2_samples(op2)));
234238
op2 += 2;
235239
out += 2;
236240
}
237241
if (op2_size & 1) {
238-
*out++ = mli_math_sub_fx(*op1, *op2++);
242+
*out++ = mli_math_sub_fx(broadcast_val, *op2++);
239243
}
240244
} else {
241245
// Elemetnwise between tensors of the same shape
@@ -264,13 +268,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_max_fx (
264268
// Simple broadcast (vector on scalar)
265269
//==============================================
266270
if (op1_size == 1 || op2_size == 1) {
267-
const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
271+
const io_T broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
268272
const MLI_PTR(io_T) vec = (op1_size > op2_size) ? (MLI_PTR(io_T))op1 : (MLI_PTR(io_T))op2;
269273
const int out_size = MAX(op1_size, op2_size);
270274

271-
io_T broadcast_val_v2[] = {broadcast_val, broadcast_val};
275+
const v2q15_t broadcast_val_v2 = fx_create_v2q15(broadcast_val, broadcast_val);
272276
for (int idx = 0; idx < out_size / 2; idx++) {
273-
mli_prv_store_2_samples(out, mli_prv_load_max_vec2(vec, (const MLI_PTR(io_T)) & broadcast_val_v2));
277+
mli_prv_store_2_samples(out, fx_max_v2q15(broadcast_val_v2, mli_prv_load_2_samples(vec)));
274278
vec += 2;
275279
out += 2;
276280
}
@@ -304,13 +308,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_min_fx (
304308
// Simple broadcast (vector on scalar)
305309
//==============================================
306310
if (op1_size == 1 || op2_size == 1) {
307-
const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
311+
const io_T broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
308312
const MLI_PTR(io_T) vec = (op1_size > op2_size) ? (MLI_PTR(io_T))op1 : (MLI_PTR(io_T))op2;
309313
const int out_size = MAX(op1_size, op2_size);
310314

311-
io_T broadcast_val_v2[] = {broadcast_val, broadcast_val};
315+
const v2q15_t broadcast_val_v2 = fx_create_v2q15(broadcast_val, broadcast_val);
312316
for (int idx = 0; idx < out_size / 2; idx++) {
313-
mli_prv_store_2_samples(out, mli_prv_load_min_vec2(vec, (const MLI_PTR(io_T)) & broadcast_val_v2));
317+
mli_prv_store_2_samples(out, fx_min_v2q15(broadcast_val_v2, mli_prv_load_2_samples(vec)));
314318
vec += 2;
315319
out += 2;
316320
}
@@ -345,7 +349,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_mul_fx (
345349
// Simple broadcast (vector on scalar)
346350
//==============================================
347351
if (op1_size == 1 || op2_size == 1) {
348-
const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
352+
const io_T broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
349353
const MLI_PTR(io_T) vec = (op1_size > op2_size) ? op1 : op2;
350354
const int out_size = MAX(op1_size, op2_size);
351355
v2q15_t broadcast_val_v2 = {broadcast_val, broadcast_val};
@@ -419,7 +423,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_mul_with_restricts
419423
// Simple broadcast (vector on scalar)
420424
//==============================================
421425
if (op1_size == 1 || op2_size == 1) {
422-
const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
426+
const io_T broadcast_val = (op1_size > op2_size) ? (*(const io_T *)op2) : (*(const io_T *)op1);
423427
const MLI_PTR(io_T) vec = (op1_size > op2_size) ? op1 : op2;
424428
const int out_size = MAX(op1_size, op2_size);
425429
v2q15_t broadcast_val_v2 = {broadcast_val, broadcast_val};

0 commit comments

Comments
 (0)