Skip to content

Commit 6b8f858

Browse files
Ahmed Abdelhakim (Si-Vision)JaccovG
authored andcommitted
eltwise max/min negative shift
1 parent 4c16f66 commit 6b8f858

File tree

3 files changed

+44
-40
lines changed

3 files changed

+44
-40
lines changed

lib/src/kernels/eltwise/impl/mli_krn_eltwise_ref.h

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121
#define INT32_TO_INT16 16
2222
#define IN_SCALE_SHIFT 32
2323
#define MUL_MAX_SHIFT 31
24+
/*
25+
* For max/min shifting more than 23 is not needed
26+
* as the scaled result ((max - in_offset) * scale) will be limited by 24 bits including the sign bit.
27+
*/
28+
#define MAX_MIN_UPPER_LIMIT_SHIFT 23
2429

2530
namespace mli {
2631
namespace krn {
@@ -264,18 +269,16 @@ void eltwise_prepare_and_run(
264269
shift2 = in2->el_params.sa.scale_frac_bits.mem.i8;
265270
shift_out = out->el_params.sa.scale_frac_bits.mem.i8;
266271
if (func_type == ELTWISE_MAX || func_type == ELTWISE_MIN) {
267-
in_scale_fx1 = mli_math_asr_rnd_fx<int32_t>(scale_1,
268-
(int32_t) shift1 - frac_bits_fx16);
269-
out_scale_fx = mli_math_asr_rnd_fx<int32_t>(scale_out,
270-
(int32_t) shift_out - frac_bits_fx16);
271-
scale_factor1 = mli_math_asr_rnd_fx<int32_t>(in_scale_fx1, -INT32_TO_INT16);
272-
scale_factor1 /= out_scale_fx;
273-
post_op_shift = INT32_TO_INT16;
274-
int norm1 = (scale_factor1 != 0) ? mli_math_norm_fx<int32_t, int>(scale_factor1) : 0;
275-
int shift = MAX(INT32_TO_INT16 - norm1, 0);
276-
scale16_1 = mli_math_cast_fx<int32_t, int16_t>(scale_factor1, shift);
277-
scale16_2 = scale16_1;
272+
int32_t scale_factor = mli_math_asl_fx<int32_t>(scale_1, INT32_TO_INT16);
273+
scale_factor = scale_factor / scale_out;
274+
post_op_shift = INT32_TO_INT16 + shift1 - shift_out;
275+
int shift;
276+
scale16_1 = mli_math_norm_cast_fx<int32_t, int16_t>(scale_factor, &shift);
277+
post_op_shift -= shift;
278+
shift = MAX(post_op_shift - MAX_MIN_UPPER_LIMIT_SHIFT, 0) + MIN(MUL_MAX_SHIFT + post_op_shift, 0);
279+
scale16_1 = mli_math_asr_rnd_fx<int16_t>(scale16_1, shift);
278280
post_op_shift -= shift;
281+
scale16_2 = scale16_1;
279282
} else if (func_type == ELTWISE_MUL) {
280283
int shift;
281284
scale_factor1 = scale_1 * scale_2;

lib/src/kernels/eltwise/impl/mli_krn_eltwise_vdsp.h

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ const int unroll_factor[2][5] = {
2929
/* ELTWISE_ADD_CONVERT = */ 1,
3030
/* ELTWISE_SUB_CONVERT = */ 1,
3131
/* ELTWISE_MUL_CONVERT = */ 4,
32-
/* ELTWISE_MAX_CONVERT = */ 3,
33-
/* ELTWISE_MIN_CONVERT = */ 3
32+
/* ELTWISE_MAX_CONVERT = */ 4,
33+
/* ELTWISE_MIN_CONVERT = */ 4
3434
}
3535
};
3636

@@ -373,7 +373,7 @@ MLI_FORCE_INLINE vNx2short_t eltwise_perform_operation<vNx2short_t, vNx2short_t,
373373
vNx2short_t res;
374374
res = mli_math_max_fx(op1, op2);
375375
if (post_op_shift > 0) {
376-
res = mli_math_asr_rnd_fx(res, post_op_shift);
376+
res = mli_math_asr_rnd_fx(res, post_op_shift);
377377
} else {
378378
res = mli_math_asl_fx(res, -post_op_shift);
379379
}
@@ -417,16 +417,17 @@ MLI_FORCE_INLINE vNx4char_t eltwise_perform_operation<vNx4char_t, vNx4char_t, EL
417417
const int pre_op_shift2,
418418
const int post_op_shift) {
419419
vNx4char_t res;
420-
int32_t acc_init = (out_offset << post_op_shift) - scale_factor1 * in_offset1;
421-
#ifdef ROUND_UP
422-
acc_init += ((1 << post_op_shift) >> 1); // rounding half up //
423-
#else
424-
#error Rounding mode not supported
425-
#endif
426-
vNx4accint_t accu = mli_math_init_accu<int32_t, vNx4accint_t>(acc_init);
420+
constexpr int mul_hi_shift = 16;
421+
int shift = post_op_shift - mul_hi_shift;
422+
int shift_left = mli_math_max_fx(1 - shift, 0);
423+
int shift_right = mli_math_max_fx(shift, 1);
427424
vNx4short_t max = to_vNx4short_t(mli_math_max_fx(op1, op2));
428-
accu = mli_math_mac_fx(accu, max, scale_factor1);
429-
res = mli_math_acc_cast_fx<vNx4char_t, vNx4accint_t, false>(accu, post_op_shift);
425+
max = mli_math_sub_fx(max, (vNx4short_t)in_offset1);
426+
max = mli_math_asl_fx(max, shift_left);
427+
vNx4short_t max_scaled = mli_math_mul_fx_high(max, scale_factor1);
428+
max_scaled = mli_math_asr_rnd_fx(max_scaled, shift_right);
429+
max_scaled = mli_math_add_fx(max_scaled, (vNx4short_t) out_offset);
430+
res = mli_math_cast_fx<vNx4short_t, vNx4char_t>(max_scaled);
430431
return res;
431432
}
432433

@@ -491,17 +492,17 @@ MLI_FORCE_INLINE vNx4char_t eltwise_perform_operation<vNx4char_t, vNx4char_t, EL
491492
const int pre_op_shift2,
492493
const int post_op_shift) {
493494
vNx4char_t res;
494-
int32_t acc_init = (out_offset << post_op_shift) - scale_factor1 * in_offset1;
495-
496-
#ifdef ROUND_UP
497-
acc_init += ((1 << post_op_shift) >> 1); // rounding half up //
498-
#else
499-
#error Rounding mode not supported
500-
#endif
501-
vNx4accint_t accu = mli_math_init_accu<int32_t, vNx4accint_t>(acc_init);
495+
constexpr int mul_hi_shift = 16;
496+
int shift = post_op_shift - mul_hi_shift;
497+
int shift_left = mli_math_max_fx(1 - shift, 0);
498+
int shift_right = mli_math_max_fx(shift, 1);
502499
vNx4short_t max = to_vNx4short_t(mli_math_min_fx(op1, op2));
503-
accu = mli_math_mac_fx(accu, max, scale_factor1);
504-
res = mli_math_acc_cast_fx<vNx4char_t, vNx4accint_t, false>(accu, post_op_shift);
500+
max = mli_math_sub_fx(max, (vNx4short_t)in_offset1);
501+
max = mli_math_asl_fx(max, shift_left);
502+
vNx4short_t max_scaled = mli_math_mul_fx_high(max, scale_factor1);
503+
max_scaled = mli_math_asr_rnd_fx(max_scaled, shift_right);
504+
max_scaled = mli_math_add_fx(max_scaled, (vNx4short_t) out_offset);
505+
res = mli_math_cast_fx<vNx4short_t, vNx4char_t>(max_scaled);
505506
return res;
506507

507508
}
@@ -572,8 +573,8 @@ MLI_FORCE_INLINE void eltwise_innerloop<int16_t, ELTWISE_MAX, false>(
572573
int idx2,
573574
int idx_out,
574575
const int count,
575-
int16_t op1_s,
576-
int16_t op2_s,
576+
int16_t op1_s,
577+
int16_t op2_s,
577578
const bool scalar_op1,
578579
const bool scalar_op2,
579580
const int16_t in_offset1,
@@ -628,8 +629,8 @@ MLI_FORCE_INLINE void eltwise_innerloop<int16_t, ELTWISE_MIN, false>(
628629
int idx2,
629630
int idx_out,
630631
const int count,
631-
int16_t op1_s,
632-
int16_t op2_s,
632+
int16_t op1_s,
633+
int16_t op2_s,
633634
const bool scalar_op1,
634635
const bool scalar_op2,
635636
const int16_t in_offset1,

user_tests/tests/mli_krn_eltwise/tests_mli_krn_eltwise.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ const crc32_calc test_1_chksum_sa8{ 0xd48163e
5757
test_5_chksum_sa8{ 0x9A14384C },
5858
test_6_chksum_fx16{ 0xfc026def }, test_6_chksum_sa8{ 0x3a54561 },
5959
test_7_chksum_fx16{ 0x488ed527 }, test_7_chksum_sa8{ 0xD4B7515B },
60-
test_8_chksum_fx16{ 0x68889D84 }, test_8_chksum_sa8{ 0x168B3B32 },
61-
test_9_chksum_fx16{ 0x9417F3D7 }, test_9_chksum_sa8{ 0xA83B910E },
62-
test_10_chksum_fx16{ 0xD728E430 }, test_10_chksum_sa8{ 0xE34DA6B0 },
60+
test_8_chksum_fx16{ 0x68889D84 }, test_8_chksum_sa8{ 0x2D86F301 },
61+
test_9_chksum_fx16{ 0x9417F3D7 }, test_9_chksum_sa8{ 0x351016DF },
62+
test_10_chksum_fx16{ 0xD728E430 }, test_10_chksum_sa8{ 0xDC1A832D },
6363
test_11_chksum_fx16{ 0xBF03F2E0 }, test_11_chksum_sa8{ 0xD36B7E94 };
6464

6565
// Platform Specific CRC Results

0 commit comments

Comments
 (0)