@@ -318,8 +318,8 @@ MLI_FORCE_INLINE vNx4short_t mli_prv_convert_sa8_fx16(
318318 const int16_t zero_point,
319319 const int16_t scale,
320320 const int shift) {
321- int shift_right = MAX (shift, 0 );
322- int shift_left = MAX (-shift, 0 );
321+ int shift_right = mli_math_max_fx (shift, 0 );
322+ int shift_left = mli_math_max_fx (-shift, 0 );
323323 vNx4short_t in_biased_shifted_no_zp = mli_math_sub_fx<vNx4short_t>(in_val, zero_point);
324324 vNx4int_t in_scaled = mli_math_mul_fx<vNx4short_t, vNx4int_t>(in_biased_shifted_no_zp, scale);
325325 vNx4short_t res = mli_math_cast_fx<vNx4int_t, vNx4short_t>(in_scaled, shift_right);
@@ -423,8 +423,8 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
423423
424424 accu_scaled = accu_scaled + quant_params->out_offset ;
425425
426- accu_scaled = MIN (accu_scaled, val_max_limit);
427- accu_scaled = MAX (accu_scaled, val_min_limit);
426+ accu_scaled = mli_math_min_fx (accu_scaled, val_max_limit);
427+ accu_scaled = mli_math_max_fx (accu_scaled, val_min_limit);
428428
429429 vNx4char_t out = to_vNx4char_t (accu_scaled);
430430 mli_prv_store_n_samples (o_ptr, out, num);
@@ -442,8 +442,8 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
442442
443443 vNx4char_t out = mli_math_acc_cast_fx<vNx4char_t, vNx4accshort_t>(acc, quant_params->out_shift );
444444
445- out = MIN (out, val_max_limit);
446- out = MAX (out, val_min_limit);
445+ out = mli_math_min_fx (out, val_max_limit);
446+ out = mli_math_max_fx (out, val_min_limit);
447447
448448 mli_prv_store_n_samples (o_ptr, out, num);
449449}
@@ -460,8 +460,8 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
460460
461461 vNx2short_t out = mli_math_acc_cast_fx<vNx2short_t, vNx2accint_t>(acc, quant_params->out_shift );
462462
463- out = MIN (out, val_max_limit);
464- out = MAX (out, val_min_limit);
463+ out = mli_math_min_fx (out, val_max_limit);
464+ out = mli_math_max_fx (out, val_min_limit);
465465
466466 mli_prv_store_n_samples (o_ptr, out, num);
467467}
@@ -478,32 +478,83 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
478478
479479 vNx4short_t out = mli_math_acc_cast_fx<vNx4short_t, vNx4accint_t>(acc, quant_params->out_shift );
480480
481- out = MIN (out, val_max_limit);
482- out = MAX (out, val_min_limit);
481+ out = mli_math_min_fx (out, val_max_limit);
482+ out = mli_math_max_fx (out, val_min_limit);
483+
484+ mli_prv_store_n_samples (o_ptr, out, num);
485+ }
486+
487+ template <>
488+ MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
489+ MLI_CONV_OUT_PTR (int8_t ) __restrict o_ptr,
490+ vNx4accshort_t acc,
491+ const s8asym_quant_specific_out_params_v* quant_params,
492+ const int16_t val_min_limit,
493+ const int16_t val_max_limit,
494+ int num) {
495+
496+ vNx4short_t accu_scaled = mli_math_acc_cast_fx<vNx4short_t, vNx4accshort_t>(acc);
497+ accu_scaled = mli_math_add_fx<vNx4short_t>(accu_scaled, quant_params->out_offset );
498+
499+ accu_scaled = mli_math_min_fx (accu_scaled, val_max_limit);
500+ accu_scaled = mli_math_max_fx (accu_scaled, val_min_limit);
501+
502+ vNx4char_t out = to_vNx4char_t (accu_scaled);
503+ mli_prv_store_n_samples (o_ptr, out, num);
504+ }
505+
506+ template <>
507+ MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
508+ MLI_CONV_OUT_PTR (int16_t ) __restrict o_ptr,
509+ vNx2accint_t acc,
510+ const fx_quant_specific_params* quant_params,
511+ const int16_t val_min_limit,
512+ const int16_t val_max_limit,
513+ int num) {
514+
515+ vNx2short_t out = mli_math_acc_cast_fx<vNx2short_t, vNx2accint_t>(acc);
516+
517+ out = mli_math_min_fx (out, val_max_limit);
518+ out = mli_math_max_fx (out, val_min_limit);
519+
520+ mli_prv_store_n_samples (o_ptr, out, num);
521+ }
522+
523+ template <>
524+ MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
525+ MLI_CONV_OUT_PTR (int16_t ) __restrict o_ptr,
526+ vNx4accint_t acc,
527+ const fx_quant_specific_params* quant_params,
528+ const int16_t val_min_limit,
529+ const int16_t val_max_limit,
530+ int num) {
531+
532+ vNx4short_t out = mli_math_acc_cast_fx<vNx4short_t, vNx4accint_t>(acc);
533+
534+ out = mli_math_min_fx (out, val_max_limit);
535+ out = mli_math_max_fx (out, val_min_limit);
483536
484537 mli_prv_store_n_samples (o_ptr, out, num);
485538}
486539
487540template <typename acc_T>
488- MLI_FORCE_INLINE acc_T ir_rnn_result_requantize (const acc_T acc, const fx_quant_specific_params* current_params,
489- const fx_quant_specific_params* next_params, int krn_idx) {
490- const int shift = current_params->out_shift - next_params->out_shift ;
491- int shift_right = MAX (shift, 0 );
492- int shift_left = MAX (-shift, 0 );
541+ MLI_FORCE_INLINE acc_T ir_rnn_result_requantize (
542+ const acc_T acc,
543+ const fx_quant_specific_params* params) {
544+ const int in_to_ir_shift = params->out_shift ;
545+ int shift_right = mli_math_max_fx (in_to_ir_shift, 0 );
546+ int shift_left = mli_math_max_fx (-in_to_ir_shift, 0 );
493547 acc_T acc_shifted = mli_math_asl_fx (acc, shift_left);
494548 return mli_math_asr_rnd_fx<acc_T, int >(acc_shifted, shift_right);
495549}
496550
497551template <>
498552MLI_FORCE_INLINE vNx4accshort_t ir_rnn_result_requantize (
499553 const vNx4accshort_t acc,
500- const s8asym_quant_specific_params* current_params,
501- const s8asym_quant_specific_params* next_params, int krn_idx) {
502-
503- MLI_ASSERT (krn_idx == 0 );
554+ const s8asym_quant_specific_params* params) {
504555
505- const int32_t mul = current_params ->out_mul / next_params-> weight_scales [ 0 ] ;
506- const int shift = current_params ->out_shift - next_params-> weight_shifts [ 0 ] ;
556+ const int32_t mul = params ->out_mul ;
557+ const int in_to_ir_shift = params ->out_shift ;
507558
508559 int mul_norm = mli_math_norm_fx<int32_t , int32_t >(mul);
509560 int32_t mul_shifted = mul << mul_norm;
@@ -512,14 +563,42 @@ MLI_FORCE_INLINE vNx4accshort_t ir_rnn_result_requantize(
512563 vNx4int_t acc_norm = mli_math_norm_fx<vNx4int_t, vNx4int_t>(acc_int);
513564 acc_int = mli_math_asl_fx<vNx4int_t, vNx4int_t>(acc_int, acc_norm);
514565
515- vNx4int_t total_shift = mli_math_add_fx<vNx4int_t>(acc_norm, (mul_norm + shift));
516566 vNx4int_t acc_scaled = mli_math_mul_fx_high (acc_int, mul_shifted);
517- vNx4int_t acc_shifted = mli_math_asr_rnd_fx (acc_scaled, total_shift);
518567
568+ constexpr int mul_high_shift = 32 ;
569+ constexpr int max_int_shift = 30 ;
570+ vNx4int_t total_shift = mli_math_add_fx<vNx4int_t>(acc_norm, (mul_norm - mul_high_shift + in_to_ir_shift));
571+ vNx4int_t shift_left = mli_math_max_fx (-total_shift, 0 );
572+ vNx4int_t shift_right = mli_math_max_fx (total_shift, 0 );
573+
574+ vNx4int_t preshift = mli_math_max_fx (shift_right - max_int_shift, 0 );
575+ shift_right = shift_right - preshift;
576+
577+ vNx4int_t acc_shifted = mli_math_asr_fx (acc_scaled, preshift);
578+ acc_shifted = mli_math_asr_rnd_fx (acc_shifted, shift_right);
579+ acc_shifted = mli_math_asl_fx (acc_shifted, shift_left);
580+
581+ #if (__Xvec_guard_bit_option == 0)
582+ vNx4short_t acc_short = mli_math_cast_fx<vNx4int_t, vNx4short_t>(acc_shifted);
583+ vNx4accshort_t res = mli_math_init_accu_add<vNx4short_t, vNx4accshort_t>(acc_short, (vNx4short_t)0 );
584+ #else
519585 vNx4int_t norm;
520586 vNx4short_t acc_short = mli_math_norm_cast_fx</* left_shift*/ false >(acc_shifted , &norm);
587+
588+ constexpr int guard_bits = 8 ;
589+ vNx4int_t mask = (1 << norm) - 1 ;
590+ vNx4int_t acc_shifted_low = acc_shifted & mask;
591+ // If the norm is more than the number of guardbits,
592+ // so the masked_acc has to be shifted, since the result is shifted with max shift equals to number of guardbits.
593+ vNx4int_t mask_shift = mli_math_max_fx (norm - guard_bits, 0 );
594+ acc_shifted_low = mli_math_asr_fx (acc_shifted_low, mask_shift);
595+
596+ norm = mli_math_min_fx (norm, guard_bits);
521597 vNx4accshort_t res = mli_math_init_accu_add<vNx4short_t, vNx4accshort_t>(acc_short, (vNx4short_t)0 );
522598 res = mli_math_asl_fx<vNx4accshort_t, vNx4short_t>(res, to_vNx4short_t (norm));
599+ res = mli_math_add (res, to_vNx4short_t (acc_shifted_low));
600+ #endif
601+
523602 return res;
524603}
525604
0 commit comments