@@ -577,12 +577,10 @@ static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
577577// horizontally add 8 floats
578578static inline float hsum_float_8 (const __m256 x ) {
579579 __m128 res = lasx_extractf128 (x , 1 );
580- ft_union tmp ;
581580 res = __lsx_vfadd_s (res , lasx_extractf128 (x , 0 ));
582581 res = __lsx_vfadd_s (res , (__m128 )__lsx_vpickod_d ((__m128i )res , (__m128i )res ));
583582 res = __lsx_vfadd_s (res , (__m128 )__lsx_vinsgr2vr_w (__lsx_vldi (0 ), __lsx_vpickve2gr_w (res , 1 ), 0 ));
584- tmp .i = __lsx_vpickve2gr_w (res , 0 );
585- return tmp .f ;
583+ return ((v4f32 )res )[0 ];
586584}
587585
588586// horizontally add 8 int32_t
@@ -924,7 +922,6 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k)
924922
925923#elif defined(__loongarch_asx )
926924 for (int i = 0 ; i < nb ; i ++ ) {
927- ft_union fi ;
928925 __m256 v0 = (__m256 )__lasx_xvld ( x , 0 );
929926 __m256 v1 = (__m256 )__lasx_xvld ( x , 32 );
930927 __m256 v2 = (__m256 )__lasx_xvld ( x , 64 );
@@ -942,8 +939,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k)
942939 max4 = __lsx_vfmax_s ( max4 , (__m128 )__lsx_vpickod_d ((__m128i ) max4 , (__m128i )max4 ) );
943940 __m128 tmp = max4 ;
944941 max4 = __lsx_vfmax_s ( max4 , (__m128 )__lsx_vinsgr2vr_w (tmp , __lsx_vpickve2gr_w ( max4 , 1 ), 0 ));
945- fi .i = __lsx_vpickve2gr_w ( (__m128i )max4 , 0 );
946- const float max_scalar = fi .f ;
942+ const float max_scalar = ((v4f32 )max4 )[0 ];
947943
948944 // Quantize these floats
949945 const float d = max_scalar / 127.f ;
@@ -1248,7 +1244,6 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int64_t k)
12481244
12491245#elif defined(__loongarch_asx )
12501246 for (int i = 0 ; i < nb ; i ++ ) {
1251- ft_union ft ;
12521247 __m256 v0 = (__m256 )__lasx_xvld ( x , 0 );
12531248 __m256 v1 = (__m256 )__lasx_xvld ( x , 32 );
12541249 __m256 v2 = (__m256 )__lasx_xvld ( x , 64 );
@@ -1266,8 +1261,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int64_t k)
12661261 max4 = __lsx_vfmax_s ( max4 , (__m128 )__lsx_vpickod_d ((__m128i ) max4 , (__m128i )max4 ) );
12671262 __m128 tmp = max4 ;
12681263 max4 = __lsx_vfmax_s ( max4 , (__m128 )__lsx_vextrins_w ((__m128i )tmp , (__m128i )max4 , 0x10 ));
1269- ft .i = __lsx_vpickve2gr_w ( (__m128i )max4 , 0 );
1270- const float max_scalar = ft .f ;
1264+ const float max_scalar = ((v4f32 )max4 )[0 ];
12711265
12721266 // Quantize these floats
12731267 const float d = max_scalar / 127.f ;
@@ -6139,9 +6133,7 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
61396133 acc_m = __lsx_vfadd_s (acc_m , (__m128 )tmp1 );
61406134
61416135
6142- ft_union fi ;
6143- fi .i = __lsx_vpickve2gr_w (acc_m , 0 );
6144- * s = hsum_float_8 (acc ) + fi .f ;
6136+ * s = hsum_float_8 (acc ) + ((v4f32 )acc_m )[0 ];
61456137#else
61466138
61476139 const uint8_t * scales = (const uint8_t * )& utmp [0 ];
0 commit comments