1818//=================================================================================
1919mli_status mli_hlp_float_to_fx_tensor (const float * src , uint32_t src_size , mli_tensor * dst ) {
2020 mli_status ret = MLI_STATUS_OK ;
21- const uint32_t scale_val = 1u << (dst -> el_params .fx .frac_bits );
21+ float scale_val = (float ) ((int64_t )1u << mli_hlp_tensor_scale_shift (dst )) / (float ) mli_hlp_tensor_scale (dst , 0 );
22+ int16_t zero_offset = mli_hlp_tensor_zero_offset (dst , 0 );
2223
2324 if (dst -> el_type == MLI_EL_FX_16 ) {
2425 if (dst -> capacity < src_size * sizeof (int16_t ))
@@ -30,7 +31,7 @@ mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_
3031 int32_t dst_val = (int32_t ) (scale_val * src [idx ] + round_val );
3132 dst_arr [idx ] = (int16_t ) (MIN (MAX (dst_val , INT16_MIN ), INT16_MAX ));
3233 }
33- } else {
34+ } else if ( dst -> el_type == MLI_EL_FX_8 ) {
3435 if (dst -> capacity < src_size * sizeof (int8_t ))
3536 return MLI_STATUS_LENGTH_ERROR ;
3637
@@ -40,6 +41,74 @@ mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_
4041 const int32_t dst_val = (int32_t ) (scale_val * src [idx ] + round_val );
4142 dst_arr [idx ] = (int8_t ) (MIN (MAX (dst_val , INT8_MIN ), INT8_MAX ));
4243 }
44+ } else if (dst -> el_type == MLI_EL_ASYM_I8 ){
45+ if (dst -> capacity < src_size * sizeof (int8_t ))
46+ return MLI_STATUS_LENGTH_ERROR ;
47+
48+ int8_t * dst_arr = dst -> data ;
49+ if (dst -> el_params .asym .dim < 0 ) {
50+ for (int idx = 0 ; idx < src_size ; idx ++ ) {
51+ const float round_val = (src [idx ] > 0 ) ? 0.5f : -0.5f ;
52+ const int32_t dst_val = (int32_t ) (scale_val * src [idx ] + round_val );
53+ dst_arr [idx ] = (int8_t ) (MIN (MAX (dst_val + zero_offset , INT8_MIN ), INT8_MAX ));
54+ }
55+ } else {
56+ const int concat_dim = dst -> el_params .asym .dim ;
57+ const int total_elements = mli_hlp_count_elem_num (dst , 0 );
58+ const int elements_to_convert = (concat_dim + 1 == dst -> rank )? 1 : mli_hlp_count_elem_num (dst , concat_dim + 1 );
59+ const int step_after_conv = mli_hlp_count_elem_num (dst , concat_dim );
60+ const int conversions_num = dst -> shape [concat_dim ];
61+
62+ if (total_elements != src_size )
63+ return MLI_STATUS_LENGTH_ERROR ;
64+
65+ for (int c_idx = 0 ; c_idx < conversions_num ; c_idx ++ ) {
66+ scale_val = ((int64_t )1u << mli_hlp_tensor_scale_shift (dst )) / (float )mli_hlp_tensor_scale (dst , (uint32_t )c_idx );
67+ zero_offset = mli_hlp_tensor_zero_offset (dst , c_idx );
68+ for (int data_idx = c_idx * elements_to_convert ; data_idx < total_elements ; data_idx += step_after_conv ) {
69+ for (int el_idx = 0 ; el_idx < elements_to_convert ; ++ el_idx ) {
70+ const float round_val = (src [data_idx + el_idx ] > 0 ) ? 0.5f : -0.5f ;
71+ int32_t dst_val = (int32_t ) (scale_val * src [data_idx + el_idx ] + round_val );
72+ dst_arr [data_idx + el_idx ] = (int8_t )(MIN (MAX (dst_val + zero_offset , INT8_MIN ), INT8_MAX ));
73+ }
74+ }
75+ }
76+ }
77+ } else if (dst -> el_type == MLI_EL_ASYM_I32 ) {
78+ if (dst -> capacity < src_size * sizeof (int32_t ))
79+ return MLI_STATUS_LENGTH_ERROR ;
80+
81+ int32_t * dst_arr = dst -> data ;
82+ if (dst -> el_params .asym .dim < 0 ) {
83+ for (int idx = 0 ; idx < src_size ; idx ++ ) {
84+ const float round_val = (src [idx ] > 0 ) ? 0.5f : -0.5f ;
85+ int32_t dst_val = (int32_t ) (scale_val * src [idx ] + round_val );
86+ dst_arr [idx ] = dst_val + zero_offset ;
87+ }
88+ } else {
89+ const int concat_dim = dst -> el_params .asym .dim ;
90+ const int total_elements = mli_hlp_count_elem_num (dst , 0 );
91+ const int elements_to_convert = (concat_dim + 1 == dst -> rank )? 1 : mli_hlp_count_elem_num (dst , concat_dim + 1 );
92+ const int step_after_conv = mli_hlp_count_elem_num (dst , concat_dim );
93+ const int conversions_num = dst -> shape [concat_dim ];
94+
95+ if (total_elements != src_size )
96+ return MLI_STATUS_LENGTH_ERROR ;
97+
98+ for (int c_idx = 0 ; c_idx < conversions_num ; c_idx ++ ) {
99+ scale_val = ((int64_t )1u << mli_hlp_tensor_scale_shift (dst )) / (float )mli_hlp_tensor_scale (dst , (uint32_t )c_idx );
100+ zero_offset = mli_hlp_tensor_zero_offset (dst , c_idx );
101+ for (int data_idx = c_idx * elements_to_convert ; data_idx < total_elements ; data_idx += step_after_conv ) {
102+ for (int el_idx = 0 ; el_idx < elements_to_convert ; ++ el_idx ) {
103+ const float round_val = (src [data_idx + el_idx ] > 0 ) ? 0.5f : -0.5f ;
104+ int32_t dst_val = (int32_t ) (scale_val * src [data_idx + el_idx ] + round_val );
105+ dst_arr [data_idx + el_idx ] = dst_val + zero_offset ;
106+ }
107+ }
108+ }
109+ }
110+ } else {
111+ ret = MLI_STATUS_TYPE_MISMATCH ;
43112 }
44113 return ret ;
45114}
@@ -54,15 +123,62 @@ mli_status mli_hlp_fx_tensor_to_float (const mli_tensor * src, float *dst, uint3
54123 if (elem_num == 0 )
55124 return MLI_STATUS_BAD_TENSOR ;
56125
57- const float scale_val = 1.0f / (float ) (1u << (src -> el_params .fx .frac_bits ));
126+ float scale_val = (float )mli_hlp_tensor_scale (src , 0 ) / (float ) (1u << mli_hlp_tensor_scale_shift (src ));
127+ int16_t zero_offset = mli_hlp_tensor_zero_offset (src , 0 );
58128 if (src -> el_type == MLI_EL_FX_16 ) {
59129 int16_t * src_arr = src -> data ;
60130 for (int idx = 0 ; idx < elem_num ; idx ++ )
61131 dst [idx ] = (float ) (scale_val * src_arr [idx ]);
62- } else {
132+ } else if ( src -> el_type == MLI_EL_FX_8 ) {
63133 int8_t * src_arr = src -> data ;
64134 for (int idx = 0 ; idx < elem_num ; idx ++ )
65- dst [idx ] = (float ) (scale_val * src_arr [idx ]);
135+ dst [idx ] = (float ) (scale_val * (src_arr [idx ] - zero_offset ));
136+ } else if (src -> el_type == MLI_EL_ASYM_I8 ){
137+ int8_t * src_arr = src -> data ;
138+ if (src -> el_params .asym .dim < 0 ) {
139+ for (int idx = 0 ; idx < elem_num ; idx ++ )
140+ dst [idx ] = (float ) (scale_val * (src_arr [idx ] - zero_offset ));
141+ } else {
142+ const int concat_dim = src -> el_params .asym .dim ;
143+ const int total_elements = mli_hlp_count_elem_num (src , 0 );
144+ const int elements_to_convert = (concat_dim + 1 == src -> rank )? 1 : mli_hlp_count_elem_num (src , concat_dim + 1 );
145+ const int step_after_conv = mli_hlp_count_elem_num (src , concat_dim );
146+ const int conversions_num = src -> shape [concat_dim ];
147+
148+ for (int c_idx = 0 ; c_idx < conversions_num ; c_idx ++ ) {
149+ scale_val = (float )mli_hlp_tensor_scale (src , c_idx ) / (float ) (1u << mli_hlp_tensor_scale_shift (src ));
150+ zero_offset = mli_hlp_tensor_zero_offset (src , c_idx );
151+ for (int data_idx = c_idx * elements_to_convert ; data_idx < total_elements ; data_idx += step_after_conv ) {
152+ for (int el_idx = 0 ; el_idx < elements_to_convert ; ++ el_idx ) {
153+ dst [data_idx + el_idx ] = (float ) (scale_val * (src_arr [data_idx + el_idx ] - zero_offset ));
154+ }
155+ }
156+ }
157+ }
158+ } else if (src -> el_type == MLI_EL_ASYM_I32 ) {
159+ int32_t * src_arr = src -> data ;
160+ if (src -> el_params .asym .dim < 0 ) {
161+ for (int idx = 0 ; idx < elem_num ; idx ++ )
162+ dst [idx ] = (float ) (scale_val * (src_arr [idx ] - zero_offset ));
163+ } else {
164+ const int concat_dim = src -> el_params .asym .dim ;
165+ const int total_elements = mli_hlp_count_elem_num (src , 0 );
166+ const int elements_to_convert = (concat_dim + 1 == src -> rank )? 1 : mli_hlp_count_elem_num (src , concat_dim + 1 );
167+ const int step_after_conv = mli_hlp_count_elem_num (src , concat_dim );
168+ const int conversions_num = src -> shape [concat_dim ];
169+
170+ for (int c_idx = 0 ; c_idx < conversions_num ; c_idx ++ ) {
171+ scale_val = (float )mli_hlp_tensor_scale (src , c_idx ) / (float ) (1u << mli_hlp_tensor_scale_shift (src ));
172+ zero_offset = mli_hlp_tensor_zero_offset (src , c_idx );
173+ for (int data_idx = c_idx * elements_to_convert ; data_idx < total_elements ; data_idx += step_after_conv ) {
174+ for (int el_idx = 0 ; el_idx < elements_to_convert ; ++ el_idx ) {
175+ dst [data_idx + el_idx ] = (float ) (scale_val * (src_arr [data_idx + el_idx ] - zero_offset ));
176+ }
177+ }
178+ }
179+ }
180+ } else {
181+ return MLI_STATUS_TYPE_MISMATCH ;
66182 }
67183 return MLI_STATUS_OK ;
68184}
0 commit comments