1515#include " mli_prv_quant.h"
1616#include " mli_prv_tensor.h"
1717#include " mli_types.h"
18+ #include < math.h>
19+
20+ template <typename in_T, typename out_T, typename acc_T>
21+ MLI_FORCE_INLINE void calc_convert (const MLI_PTR (in_T) src_tensor_arr,
22+ MLI_OUT_PTR(out_T) dst_tensor_arr,
23+ const int16_t in_zp, const int16_t scale,
24+ const int16_t scale_shift, const int16_t out_zp) {
25+ if (std::is_same<acc_T, int64_t >::value) {
26+ const int mul_hi_shift = 32 ;
27+ int32_t src_in_zp = mli_math_sub_fx<int32_t >(*src_tensor_arr, in_zp);
28+ int32_t src_norm = mli_math_norm_fx<int32_t , int32_t >(src_in_zp);
29+ src_in_zp = mli_math_asl_fx<int32_t >(src_in_zp, src_norm);
30+
31+ int32_t scale_norm = mli_math_norm_fx<int32_t , int32_t >((int32_t ) scale);
32+ int32_t scale32 = mli_math_asl_fx<int32_t >((int32_t ) scale, scale_norm);
33+
34+ int64_t dst_acc = mli_math_mul_fx<int32_t , int64_t >(src_in_zp, scale32);
35+ int32_t acc_hi = dst_acc >> mul_hi_shift;
36+
37+ int32_t dst_acc_shf_casted = mli_math_asr_rnd_fx<int32_t >(acc_hi, scale_shift + scale_norm + src_norm - mul_hi_shift);
38+ int32_t dst_val = mli_math_add_fx<int32_t >(dst_acc_shf_casted, out_zp);
39+ *dst_tensor_arr = mli_math_cast_fx<int32_t , out_T>(dst_val, 0 );
40+ } else {
41+ int16_t src_in_zp = mli_math_sub_fx<int16_t >(*src_tensor_arr, in_zp);
42+ acc_T dst_acc = mli_math_mul_fx<int16_t , acc_T>(src_in_zp, scale);
43+ acc_T dst_acc_shf_casted = mli_math_asr_rnd_fx<acc_T>(dst_acc, scale_shift);
44+ acc_T dst_val = mli_math_add_fx<acc_T>(dst_acc_shf_casted, out_zp);
45+ *dst_tensor_arr = mli_math_cast_fx<acc_T, out_T>(dst_val, 0 );
46+ }
47+ }
48+
1849
1950namespace mli {
2051namespace hlp {
@@ -26,9 +57,6 @@ template <typename in_T, typename out_T, typename acc_T>
2657mli_status compute_convert_quantized_data (const mli_tensor * src, mli_tensor * dst) {
2758 mli_prv_fx_init_dsp_ctrl ();
2859
29- /* If the accumulator is int64_t, so int32_t should be used for multiplying. */
30- typedef typename std::conditional<std::is_same<acc_T, int64_t >::value, int32_t , int16_t >::type mul_T;
31-
3260 /* Get Generic Private Tensors */
3361 auto src_prv = mli_prv_get_generic_tensor<MLI_PTR (in_T)>(src);
3462 auto dst_prv = mli_prv_get_generic_tensor<MLI_OUT_PTR (out_T)>(dst);
@@ -63,10 +91,10 @@ mli_status compute_convert_quantized_data(const mli_tensor * src, mli_tensor * d
6391 /* Calculate scale and scaled zero point. */
6492 mli::krn::s8asym_quant_params params;
6593 mli::krn::define_requant_params (src, dst, ¶ms, scale_idx);
66- const int16_t scale_shift = params.shift ;
94+ const int16_t scale_shift = mli_math_min_fx ( params.shift , ( int16_t ) (( sizeof (acc_T) * 8 ) - 1 )) ;
6795 const int16_t scale = params.scale ;
68- int16_t in_zp = mli_hlp_tensor_zero_offset (src, scale_idx);
69- int16_t out_zp = mli_hlp_tensor_zero_offset (dst, scale_idx);
96+ const int16_t in_zp = mli_hlp_tensor_zero_offset (src, scale_idx);
97+ const int16_t out_zp = mli_hlp_tensor_zero_offset (dst, scale_idx);
7098 /* Calculate borders across all dimensions for slice where this scale is applicable */
7199 int dim_start[MLI_MAX_RANK] = { 0 };
72100 int dim_end[MLI_MAX_RANK] = { 0 };
@@ -84,11 +112,8 @@ mli_status compute_convert_quantized_data(const mli_tensor * src, mli_tensor * d
84112 const int dst_pos = POS (&dst_prv, dim0_idx, dim1_idx, dim2_idx, dim3_idx);
85113 MLI_ASSERT (src_pos < src_tensor_size);
86114 MLI_ASSERT (dst_pos < dst_tensor_size);
87- mul_T src_in_zp = mli_math_sub_fx<mul_T>(src_tensor_arr[src_pos], in_zp);
88- acc_T dst_acc = mli_math_mul_fx<mul_T, acc_T>(src_in_zp, scale);
89- acc_T dst_acc_shf_casted = mli_math_asr_rnd_fx<acc_T>(dst_acc, scale_shift);
90- acc_T dst_val = mli_math_add_fx<acc_T>(dst_acc_shf_casted, out_zp);
91- dst_tensor_arr[dst_pos] = mli_math_cast_fx<acc_T, out_T>(dst_val, 0 );
115+ calc_convert<in_T, out_T, acc_T>(&src_tensor_arr[src_pos], &dst_tensor_arr[dst_pos],
116+ in_zp, scale, scale_shift, out_zp);
92117 }
93118 }
94119 }
@@ -137,7 +162,7 @@ mli_status convert_float_data(const mli_tensor * src, mli_tensor * dst, convert_
137162
138163 const mli_tensor* tensor = nullptr ;
139164 const mli_tensor* float_tensor = nullptr ;
140-
165+
141166 /* Defining float_tensor and tensor depending on current conversion direction */
142167 if (mode == mli::hlp::QUANTIZE) {
143168 float_tensor = src;
@@ -171,14 +196,16 @@ mli_status convert_float_data(const mli_tensor * src, mli_tensor * dst, convert_
171196 /* Transformation will be applied on slices across scales dimension (or all tensor) */
172197 for (int scale_idx = 0 ; scale_idx < scales_num; ++scale_idx) {
173198 /* Calculate current scale and zero offset */
174- float scale_val;
199+ float scale_val = 1.0 ;
200+ int8_t frac_bits = mli_hlp_tensor_scale_shift (tensor, scale_idx);
201+ float scale = (float ) mli_hlp_tensor_scale (tensor, scale_idx);
175202 if (mode == mli::hlp::QUANTIZE) {
176- scale_val = (float )((int64_t )1l << mli_hlp_tensor_scale_shift (tensor, scale_idx));
177- scale_val = scale_val / (float )mli_hlp_tensor_scale (tensor, scale_idx);
178- } else if (mode == mli::hlp::DEQUANTIZE) {
179- scale_val = (float )mli_hlp_tensor_scale (tensor, scale_idx);
180- scale_val = scale_val / (float )((int64_t )1l << mli_hlp_tensor_scale_shift (tensor, scale_idx));
203+ scale = 1.0 / scale;
204+ scale_val = ldexp (scale, ((int32_t ) frac_bits));
205+ } else {
206+ scale_val = ldexp (scale, -((int32_t ) frac_bits));
181207 }
208+
182209 int16_t zero_offset = mli_hlp_tensor_zero_offset (tensor, scale_idx);
183210
184211 /* Calculate borders across all dimensions for slice where this scale is applicable */
0 commit comments