@@ -61,11 +61,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_sub_fx (
6161 // Simple broadcast (vector on scalar)
6262 // ==============================================
6363 if (op2_size == 1 ) {
64+ const io_T broadcast_val = *(const io_T *)op2;
6465 // Vector minus scalar
65- for (int idx = 0 ; idx < op1_size; idx++) out[idx] = mli_math_sub_fx (op1[idx], *op2 );
66+ for (int idx = 0 ; idx < op1_size; idx++) out[idx] = mli_math_sub_fx (op1[idx], broadcast_val );
6667 } else if (op1_size == 1 ) {
68+ const io_T broadcast_val = *(const io_T *)op1;
6769 // Scalar minus Vector
68- for (int idx = 0 ; idx < op2_size; idx++) out[idx] = mli_math_sub_fx (*op1 , op2[idx]);
70+ for (int idx = 0 ; idx < op2_size; idx++) out[idx] = mli_math_sub_fx (broadcast_val , op2[idx]);
6971 } else {
7072 // Elemetnwise between tensors of the same shape
7173 // ==============================================
@@ -87,7 +89,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_add_fx (
8789 // Simple broadcast (vector on scalar)
8890 // ==============================================
8991 if (op1_size == 1 || op2_size == 1 ) {
90- const int8_t broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
92+ const int8_t broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
9193 const MLI_PTR (int8_t ) vec = (op1_size > op2_size) ? (MLI_PTR (int8_t ))op1 : (MLI_PTR (int8_t ))op2;
9294 const int out_size = MAX (op1_size, op2_size);
9395
@@ -116,7 +118,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_max_fx (
116118 // Simple broadcast (vector on scalar)
117119 // ==============================================
118120 if (op1_size == 1 || op2_size == 1 ) {
119- const int8_t broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
121+ const int8_t broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
120122 const MLI_PTR (int8_t ) vec = (op1_size > op2_size) ? (MLI_PTR (int8_t ))op1 : (MLI_PTR (int8_t ))op2;
121123 const int out_size = MAX (op1_size, op2_size);
122124
@@ -145,7 +147,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_min_fx (
145147 // Simple broadcast (vector on scalar)
146148 // ==============================================
147149 if (op1_size == 1 || op2_size == 1 ) {
148- const int8_t broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
150+ const int8_t broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
149151 const MLI_PTR (int8_t ) vec = (op1_size > op2_size) ? (MLI_PTR (int8_t ))op1 : (MLI_PTR (int8_t ))op2;
150152 const int out_size = MAX (op1_size, op2_size);
151153
@@ -175,13 +177,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_add_fx (
175177 // Simple broadcast (vector on scalar)
176178 // ==============================================
177179 if (op1_size == 1 || op2_size == 1 ) {
178- const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
180+ const io_T broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
179181 const MLI_PTR (io_T) vec = (op1_size > op2_size) ? (MLI_PTR (io_T))op1 : (MLI_PTR (io_T))op2;
180182 const int out_size = MAX (op1_size, op2_size);
181183
182- io_T broadcast_val_v2[] = { broadcast_val, broadcast_val} ;
184+ const v2q15_t broadcast_val_v2 = fx_create_v2q15 ( broadcast_val, broadcast_val) ;
183185 for (int idx = 0 ; idx < out_size / 2 ; idx++) {
184- mli_prv_store_2_samples (out, mli_prv_load_add_vec2 (vec, ( const MLI_PTR (io_T)) & broadcast_val_v2 ));
186+ mli_prv_store_2_samples (out, fx_add_v2q15 (broadcast_val_v2, mli_prv_load_2_samples (vec) ));
185187 vec += 2 ;
186188 out += 2 ;
187189 }
@@ -216,26 +218,28 @@ static inline void __attribute__ ((always_inline)) eltwise_op_sub_fx (
216218 // Simple broadcast (vector on scalar)
217219 // ==============================================
218220 if (op2_size == 1 ) {
219- io_T broadcast_val_v2[] = {*op2, *op2};
221+ const io_T broadcast_val = *(const io_T *)op2;
222+ const v2q15_t broadcast_val_v2 = fx_create_v2q15 (broadcast_val, broadcast_val);
220223 // Vector minus scalar
221224 for (int idx = 0 ; idx < op1_size / 2 ; idx++) {
222- mli_prv_store_2_samples (out, mli_prv_load_sub_vec2 (op1, ( const MLI_PTR (io_T)) & broadcast_val_v2));
225+ mli_prv_store_2_samples (out, fx_sub_v2q15 ( mli_prv_load_2_samples (op1), broadcast_val_v2));
223226 op1 += 2 ;
224227 out += 2 ;
225228 }
226229 if (op1_size & 1 ) {
227- *out++ = mli_math_sub_fx (*op1++, *op2 );
230+ *out++ = mli_math_sub_fx (*op1++, broadcast_val );
228231 }
229232 } else if (op1_size == 1 ) {
230- io_T broadcast_val_v2[] = {*op1, *op1};
233+ const io_T broadcast_val = *(const io_T *)op1;
234+ const v2q15_t broadcast_val_v2 = fx_create_v2q15 (broadcast_val, broadcast_val);
231235 // Scalar minus Vector
232236 for (int idx = 0 ; idx < op2_size / 2 ; idx++) {
233- mli_prv_store_2_samples (out, mli_prv_load_sub_vec2 (( const MLI_PTR (io_T)) & broadcast_val_v2, op2));
237+ mli_prv_store_2_samples (out, fx_sub_v2q15 ( broadcast_val_v2, mli_prv_load_2_samples ( op2) ));
234238 op2 += 2 ;
235239 out += 2 ;
236240 }
237241 if (op2_size & 1 ) {
238- *out++ = mli_math_sub_fx (*op1 , *op2++);
242+ *out++ = mli_math_sub_fx (broadcast_val , *op2++);
239243 }
240244 } else {
241245 // Elemetnwise between tensors of the same shape
@@ -264,13 +268,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_max_fx (
264268 // Simple broadcast (vector on scalar)
265269 // ==============================================
266270 if (op1_size == 1 || op2_size == 1 ) {
267- const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
271+ const io_T broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
268272 const MLI_PTR (io_T) vec = (op1_size > op2_size) ? (MLI_PTR (io_T))op1 : (MLI_PTR (io_T))op2;
269273 const int out_size = MAX (op1_size, op2_size);
270274
271- io_T broadcast_val_v2[] = { broadcast_val, broadcast_val} ;
275+ const v2q15_t broadcast_val_v2 = fx_create_v2q15 ( broadcast_val, broadcast_val) ;
272276 for (int idx = 0 ; idx < out_size / 2 ; idx++) {
273- mli_prv_store_2_samples (out, mli_prv_load_max_vec2 (vec, ( const MLI_PTR (io_T)) & broadcast_val_v2 ));
277+ mli_prv_store_2_samples (out, fx_max_v2q15 (broadcast_val_v2, mli_prv_load_2_samples (vec) ));
274278 vec += 2 ;
275279 out += 2 ;
276280 }
@@ -304,13 +308,13 @@ static inline void __attribute__ ((always_inline)) eltwise_op_min_fx (
304308 // Simple broadcast (vector on scalar)
305309 // ==============================================
306310 if (op1_size == 1 || op2_size == 1 ) {
307- const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
311+ const io_T broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
308312 const MLI_PTR (io_T) vec = (op1_size > op2_size) ? (MLI_PTR (io_T))op1 : (MLI_PTR (io_T))op2;
309313 const int out_size = MAX (op1_size, op2_size);
310314
311- io_T broadcast_val_v2[] = { broadcast_val, broadcast_val} ;
315+ const v2q15_t broadcast_val_v2 = fx_create_v2q15 ( broadcast_val, broadcast_val) ;
312316 for (int idx = 0 ; idx < out_size / 2 ; idx++) {
313- mli_prv_store_2_samples (out, mli_prv_load_min_vec2 (vec, ( const MLI_PTR (io_T)) & broadcast_val_v2 ));
317+ mli_prv_store_2_samples (out, fx_min_v2q15 (broadcast_val_v2, mli_prv_load_2_samples (vec) ));
314318 vec += 2 ;
315319 out += 2 ;
316320 }
@@ -345,7 +349,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_mul_fx (
345349 // Simple broadcast (vector on scalar)
346350 // ==============================================
347351 if (op1_size == 1 || op2_size == 1 ) {
348- const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
352+ const io_T broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
349353 const MLI_PTR (io_T) vec = (op1_size > op2_size) ? op1 : op2;
350354 const int out_size = MAX (op1_size, op2_size);
351355 v2q15_t broadcast_val_v2 = {broadcast_val, broadcast_val};
@@ -419,7 +423,7 @@ static inline void __attribute__ ((always_inline)) eltwise_op_mul_with_restricts
419423 // Simple broadcast (vector on scalar)
420424 // ==============================================
421425 if (op1_size == 1 || op2_size == 1 ) {
422- const io_T broadcast_val = (op1_size > op2_size) ? (*op2) : (*op1);
426+ const io_T broadcast_val = (op1_size > op2_size) ? (*( const io_T *) op2) : (*( const io_T *) op1);
423427 const MLI_PTR (io_T) vec = (op1_size > op2_size) ? op1 : op2;
424428 const int out_size = MAX (op1_size, op2_size);
425429 v2q15_t broadcast_val_v2 = {broadcast_val, broadcast_val};
0 commit comments