@@ -318,8 +318,8 @@ MLI_FORCE_INLINE vNx4short_t mli_prv_convert_sa8_fx16(
318318 const int16_t zero_point,
319319 const int16_t scale,
320320 const int shift) {
321- int shift_right = MAX (shift, 0 );
322- int shift_left = MAX (-shift, 0 );
321+ int shift_right = mli_math_max_fx (shift, 0 );
322+ int shift_left = mli_math_max_fx (-shift, 0 );
323323 vNx4short_t in_biased_shifted_no_zp = mli_math_sub_fx<vNx4short_t>(in_val, zero_point);
324324 vNx4int_t in_scaled = mli_math_mul_fx<vNx4short_t, vNx4int_t>(in_biased_shifted_no_zp, scale);
325325 vNx4short_t res = mli_math_cast_fx<vNx4int_t, vNx4short_t>(in_scaled, shift_right);
@@ -423,8 +423,8 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
423423
424424 accu_scaled = accu_scaled + quant_params->out_offset ;
425425
426- accu_scaled = MIN (accu_scaled, val_max_limit);
427- accu_scaled = MAX (accu_scaled, val_min_limit);
426+ accu_scaled = mli_math_min_fx (accu_scaled, val_max_limit);
427+ accu_scaled = mli_math_max_fx (accu_scaled, val_min_limit);
428428
429429 vNx4char_t out = to_vNx4char_t (accu_scaled);
430430 mli_prv_store_n_samples (o_ptr, out, num);
@@ -442,8 +442,8 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
442442
443443 vNx4char_t out = mli_math_acc_cast_fx<vNx4char_t, vNx4accshort_t>(acc, quant_params->out_shift );
444444
445- out = MIN (out, val_max_limit);
446- out = MAX (out, val_min_limit);
445+ out = mli_math_min_fx (out, val_max_limit);
446+ out = mli_math_max_fx (out, val_min_limit);
447447
448448 mli_prv_store_n_samples (o_ptr, out, num);
449449}
@@ -460,8 +460,8 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
460460
461461 vNx2short_t out = mli_math_acc_cast_fx<vNx2short_t, vNx2accint_t>(acc, quant_params->out_shift );
462462
463- out = MIN (out, val_max_limit);
464- out = MAX (out, val_min_limit);
463+ out = mli_math_min_fx (out, val_max_limit);
464+ out = mli_math_max_fx (out, val_min_limit);
465465
466466 mli_prv_store_n_samples (o_ptr, out, num);
467467}
@@ -478,32 +478,83 @@ MLI_FORCE_INLINE void result_cast_relu_store_v(
478478
479479 vNx4short_t out = mli_math_acc_cast_fx<vNx4short_t, vNx4accint_t>(acc, quant_params->out_shift );
480480
481- out = MIN (out, val_max_limit);
482- out = MAX (out, val_min_limit);
481+ out = mli_math_min_fx (out, val_max_limit);
482+ out = mli_math_max_fx (out, val_min_limit);
483+
484+ mli_prv_store_n_samples (o_ptr, out, num);
485+ }
486+
487+ template <>
488+ MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
489+ MLI_CONV_OUT_PTR (int8_t ) __restrict o_ptr,
490+ vNx4accshort_t acc,
491+ const s8asym_quant_specific_out_params_v* quant_params,
492+ const int16_t val_min_limit,
493+ const int16_t val_max_limit,
494+ int num) {
495+
496+ vNx4short_t accu_scaled = mli_math_acc_cast_fx<vNx4short_t, vNx4accshort_t>(acc);
497+ accu_scaled = mli_math_add_fx<vNx4short_t>(accu_scaled, quant_params->out_offset );
498+
499+ accu_scaled = mli_math_min_fx (accu_scaled, val_max_limit);
500+ accu_scaled = mli_math_max_fx (accu_scaled, val_min_limit);
501+
502+ vNx4char_t out = to_vNx4char_t (accu_scaled);
503+ mli_prv_store_n_samples (o_ptr, out, num);
504+ }
505+
506+ template <>
507+ MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
508+ MLI_CONV_OUT_PTR (int16_t ) __restrict o_ptr,
509+ vNx2accint_t acc,
510+ const fx_quant_specific_params* quant_params,
511+ const int16_t val_min_limit,
512+ const int16_t val_max_limit,
513+ int num) {
514+
515+ vNx2short_t out = mli_math_acc_cast_fx<vNx2short_t, vNx2accint_t>(acc);
516+
517+ out = mli_math_min_fx (out, val_max_limit);
518+ out = mli_math_max_fx (out, val_min_limit);
519+
520+ mli_prv_store_n_samples (o_ptr, out, num);
521+ }
522+
523+ template <>
524+ MLI_FORCE_INLINE void ir_result_cast_relu_store_v (
525+ MLI_CONV_OUT_PTR (int16_t ) __restrict o_ptr,
526+ vNx4accint_t acc,
527+ const fx_quant_specific_params* quant_params,
528+ const int16_t val_min_limit,
529+ const int16_t val_max_limit,
530+ int num) {
531+
532+ vNx4short_t out = mli_math_acc_cast_fx<vNx4short_t, vNx4accint_t>(acc);
533+
534+ out = mli_math_min_fx (out, val_max_limit);
535+ out = mli_math_max_fx (out, val_min_limit);
483536
484537 mli_prv_store_n_samples (o_ptr, out, num);
485538}
486539
487540template <typename acc_T>
488- MLI_FORCE_INLINE acc_T ir_rnn_result_requantize (const acc_T acc, const fx_quant_specific_params* current_params,
489- const fx_quant_specific_params* next_params, int krn_idx) {
490- const int shift = current_params->out_shift - next_params->out_shift ;
491- int shift_right = MAX (shift, 0 );
492- int shift_left = MAX (-shift, 0 );
541+ MLI_FORCE_INLINE acc_T ir_rnn_result_requantize (
542+ const acc_T acc,
543+ const fx_quant_specific_params* params) {
544+ const int in_to_ir_shift = params->out_shift ;
545+ int shift_right = mli_math_max_fx (in_to_ir_shift, 0 );
546+ int shift_left = mli_math_max_fx (-in_to_ir_shift, 0 );
493547 acc_T acc_shifted = mli_math_asl_fx (acc, shift_left);
494548 return mli_math_asr_rnd_fx<acc_T, int >(acc_shifted, shift_right);
495549}
496550
497551template <>
498552MLI_FORCE_INLINE vNx4accshort_t ir_rnn_result_requantize (
499553 const vNx4accshort_t acc,
500- const s8asym_quant_specific_params* current_params,
501- const s8asym_quant_specific_params* next_params, int krn_idx) {
502-
503- MLI_ASSERT (krn_idx == 0 );
554+ const s8asym_quant_specific_params* params) {
504555
505- const int32_t mul = current_params ->out_mul / next_params-> weight_scales [ 0 ] ;
506- const int shift = current_params ->out_shift - next_params-> weight_shifts [ 0 ] ;
556+ const int32_t mul = params ->out_mul ;
557+ const int in_to_ir_shift = params ->out_shift ;
507558
508559 int mul_norm = mli_math_norm_fx<int32_t , int32_t >(mul);
509560 int32_t mul_shifted = mul << mul_norm;
@@ -516,7 +567,7 @@ MLI_FORCE_INLINE vNx4accshort_t ir_rnn_result_requantize(
516567
517568 constexpr int mul_high_shift = 32 ;
518569 constexpr int max_int_shift = 30 ;
519- vNx4int_t total_shift = mli_math_add_fx<vNx4int_t>(acc_norm, (mul_norm - mul_high_shift + shift ));
570+ vNx4int_t total_shift = mli_math_add_fx<vNx4int_t>(acc_norm, (mul_norm - mul_high_shift + in_to_ir_shift ));
520571 vNx4int_t shift_left = mli_math_max_fx (-total_shift, 0 );
521572 vNx4int_t shift_right = mli_math_max_fx (total_shift, 0 );
522573
0 commit comments