Skip to content

Commit 0c09acd

Browse files
committed
[rnn_dense] fix rnn dense accuracy issues for no_guard_bits configs
1 parent 5e60c47 commit 0c09acd

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

lib/src/bricks/impl/mli_krn_rnn_dense_op_vdsp.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,27 @@ static inline void rnn_dense_op_stacked(
9494
dense_out_ptr -= gates_num * out_elements;
9595
}
9696

97+
MLI_FORCE_INLINE vNx2accint_t mli_math_add_accus(vNx2accint_t L, vNx2accint_t R) {
98+
return mli_math_add(L, R);
99+
}
100+
101+
MLI_FORCE_INLINE vNx4accint_t mli_math_add_accus(vNx4accint_t L, vNx4accint_t R) {
102+
return mli_math_add(L, R);
103+
}
104+
105+
MLI_FORCE_INLINE vNx4accshort_t mli_math_add_accus(vNx4accshort_t L, vNx4accshort_t R) {
106+
#if (__Xvec_guard_bit_option == 0)
107+
vNx4short_t L_short = mli_math_acc_cast_fx<vNx4short_t, vNx4accshort_t>(L);
108+
vNx4short_t R_short = mli_math_acc_cast_fx<vNx4short_t, vNx4accshort_t>(R);
109+
110+
vNx4short_t res = mli_math_add_fx<vNx4short_t>(L_short, R_short);
111+
112+
return mli_math_init_accu_add<vNx4short_t, vNx4accshort_t>(res, (vNx4short_t)0);
113+
#else
114+
return mli_math_add(L, R);
115+
#endif
116+
}
117+
97118
template <typename io_T, typename w_T, typename b_T, typename acc_T, typename quant_T>
98119
static inline void rnn_dense_op(
99120
const MLI_PTR(io_T) __restrict * inputs,
@@ -125,7 +146,8 @@ static inline void rnn_dense_op(
125146
output_params = adjust_quant_params_v(&in_to_out_quant_params[idx], 0);
126147
accu = dotprod_inputzp_1D_v(inputs[idx], &weights[idx][o_idx], accu, in_elements[idx],
127148
1, w_ch_out_mem_strides[idx], &in_to_out_quant_params[idx]);
128-
accu = mli_math_add(accu, prev_step);
149+
150+
accu = mli_math_add_accus(accu, prev_step);
129151

130152
if(inputs_num - idx != 1) {
131153
mli::krn::ref::adjust_quant_params(&in_to_out_quant_params[idx], o_idx);

0 commit comments

Comments
 (0)