@@ -503,6 +503,25 @@ MLI_FORCE_INLINE void ir_result_cast_relu_store_v(
503503 mli_prv_store_n_samples (o_ptr, out, num);
504504}
505505
506+ template <>
507+ MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
508+ MLI_CONV_OUT_PTR (int8_t ) __restrict o_ptr,
509+ vNx4int_t acc,
510+ const s8asym_quant_specific_out_params_v* quant_params,
511+ const int16_t val_min_limit,
512+ const int16_t val_max_limit,
513+ int num) {
514+
515+ vNx4short_t accu_scaled = mli_math_cast_fx<vNx4int_t, vNx4short_t>(acc);
516+ accu_scaled = mli_math_add_fx<vNx4short_t>(accu_scaled, quant_params->out_offset );
517+
518+ accu_scaled = mli_math_min_fx (accu_scaled, val_max_limit);
519+ accu_scaled = mli_math_max_fx (accu_scaled, val_min_limit);
520+
521+ vNx4char_t out = to_vNx4char_t (accu_scaled);
522+ mli_prv_store_n_samples (o_ptr, out, num);
523+ }
524+
506525template <>
507526MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
508527 MLI_CONV_OUT_PTR (int16_t ) __restrict o_ptr,
@@ -537,19 +556,19 @@ MLI_FORCE_INLINE void ir_result_cast_relu_store_v(
537556 mli_prv_store_n_samples (o_ptr, out, num);
538557}
539558
540- template <typename acc_T>
541- MLI_FORCE_INLINE acc_T ir_rnn_result_requantize (
559+ template <typename acc_T, typename out_T=acc_T >
560+ MLI_FORCE_INLINE out_T ir_rnn_result_requantize (
542561 const acc_T acc,
543562 const fx_quant_specific_params* params) {
544563 const int in_to_ir_shift = params->out_shift ;
545564 int shift_right = mli_math_max_fx (in_to_ir_shift, 0 );
546565 int shift_left = mli_math_max_fx (-in_to_ir_shift, 0 );
547- acc_T acc_shifted = mli_math_asl_fx (acc, shift_left);
548- return mli_math_asr_rnd_fx<acc_T , int >(acc_shifted, shift_right);
566+ out_T acc_shifted = mli_math_asl_fx (acc, shift_left);
567+ return mli_math_asr_rnd_fx<out_T , int >(acc_shifted, shift_right);
549568}
550569
551570template <>
552- MLI_FORCE_INLINE vNx4accshort_t ir_rnn_result_requantize (
571+ MLI_FORCE_INLINE vNx4int_t ir_rnn_result_requantize (
553572 const vNx4accshort_t acc,
554573 const s8asym_quant_specific_params* params) {
555574
@@ -578,28 +597,7 @@ MLI_FORCE_INLINE vNx4accshort_t ir_rnn_result_requantize(
578597 acc_shifted = mli_math_asr_rnd_fx (acc_shifted, shift_right);
579598 acc_shifted = mli_math_asl_fx (acc_shifted, shift_left);
580599
581- #if (__Xvec_guard_bit_option == 0)
582- vNx4short_t acc_short = mli_math_cast_fx<vNx4int_t, vNx4short_t>(acc_shifted);
583- vNx4accshort_t res = mli_math_init_accu_add<vNx4short_t, vNx4accshort_t>(acc_short, (vNx4short_t)0 );
584- #else
585- vNx4int_t norm;
586- vNx4short_t acc_short = mli_math_norm_cast_fx</* left_shift*/ false >(acc_shifted , &norm);
587-
588- constexpr int guard_bits = 8 ;
589- vNx4int_t mask = (1 << norm) - 1 ;
590- vNx4int_t acc_shifted_low = acc_shifted & mask;
591- // If the norm is more than the number of guardbits,
592- // so the masked_acc has to be shifted, since the result is shifted with max shift equals to number of guardbits.
593- vNx4int_t mask_shift = mli_math_max_fx (norm - guard_bits, 0 );
594- acc_shifted_low = mli_math_asr_fx (acc_shifted_low, mask_shift);
595-
596- norm = mli_math_min_fx (norm, guard_bits);
597- vNx4accshort_t res = mli_math_init_accu_add<vNx4short_t, vNx4accshort_t>(acc_short, (vNx4short_t)0 );
598- res = mli_math_asl_fx<vNx4accshort_t, vNx4short_t>(res, to_vNx4short_t (norm));
599- res = mli_math_add (res, to_vNx4short_t (acc_shifted_low));
600- #endif
601-
602- return res;
600+ return acc_shifted;
603601}
604602
605603} // namespace vdsp
0 commit comments