Skip to content

Commit 99bbe26

Browse files
committed
ggml : Fix warnings when run cpu CI locally on LoongArch
1 parent 45aa1db commit 99bbe26

File tree

2 files changed

+10
-24
lines changed

2 files changed

+10
-24
lines changed

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -360,21 +360,15 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
360360
#endif
361361

362362
#if defined(__loongarch_asx)
363-
364-
typedef union {
365-
int32_t i;
366-
float f;
367-
} ft_union;
368-
369363
/* float type data load instructions */
370-
static __m128 __lsx_vreplfr2vr_s(float val) {
371-
ft_union fi_tmpval = {.f = val};
372-
return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
364+
static __m128 __lsx_vreplfr2vr_s(const float val) {
365+
v4f32 res = {val, val, val, val};
366+
return (__m128)res;
373367
}
374368

375-
static __m256 __lasx_xvreplfr2vr_s(float val) {
376-
ft_union fi_tmpval = {.f = val};
377-
return (__m256)__lasx_xvreplgr2vr_w(fi_tmpval.i);
369+
static __m256 __lasx_xvreplfr2vr_s(const float val) {
370+
v8f32 res = {val, val, val, val, val, val, val, val};
371+
return (__m256)res;
378372
}
379373
#endif
380374

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -577,12 +577,10 @@ static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
577577
// horizontally add 8 floats
578578
static inline float hsum_float_8(const __m256 x) {
579579
__m128 res = lasx_extractf128(x, 1);
580-
ft_union tmp;
581580
res = __lsx_vfadd_s(res, lasx_extractf128(x, 0));
582581
res = __lsx_vfadd_s(res, (__m128)__lsx_vpickod_d((__m128i)res, (__m128i)res));
583582
res = __lsx_vfadd_s(res, (__m128)__lsx_vinsgr2vr_w(__lsx_vldi(0), __lsx_vpickve2gr_w(res, 1), 0));
584-
tmp.i = __lsx_vpickve2gr_w(res, 0);
585-
return tmp.f;
583+
return ((v4f32)res)[0];
586584
}
587585

588586
// horizontally add 8 int32_t
@@ -924,7 +922,6 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k)
924922

925923
#elif defined(__loongarch_asx)
926924
for (int i = 0; i < nb; i++) {
927-
ft_union fi;
928925
__m256 v0 = (__m256)__lasx_xvld( x , 0);
929926
__m256 v1 = (__m256)__lasx_xvld( x , 32);
930927
__m256 v2 = (__m256)__lasx_xvld( x , 64);
@@ -942,8 +939,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k)
942939
max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vpickod_d((__m128i) max4, (__m128i)max4 ) );
943940
__m128 tmp = max4;
944941
max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vinsgr2vr_w(tmp, __lsx_vpickve2gr_w( max4, 1 ), 0 ));
945-
fi.i = __lsx_vpickve2gr_w( (__m128i)max4, 0 );
946-
const float max_scalar = fi.f;
942+
const float max_scalar = ((v4f32)max4)[0];
947943

948944
// Quantize these floats
949945
const float d = max_scalar / 127.f;
@@ -1248,7 +1244,6 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int64_t k)
12481244

12491245
#elif defined(__loongarch_asx)
12501246
for (int i = 0; i < nb; i++) {
1251-
ft_union ft;
12521247
__m256 v0 = (__m256)__lasx_xvld( x , 0 );
12531248
__m256 v1 = (__m256)__lasx_xvld( x , 32 );
12541249
__m256 v2 = (__m256)__lasx_xvld( x , 64 );
@@ -1266,8 +1261,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int64_t k)
12661261
max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vpickod_d((__m128i) max4, (__m128i)max4 ) );
12671262
__m128 tmp = max4;
12681263
max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vextrins_w((__m128i)tmp, (__m128i)max4, 0x10 ));
1269-
ft.i = __lsx_vpickve2gr_w( (__m128i)max4, 0 );
1270-
const float max_scalar = ft.f;
1264+
const float max_scalar = ((v4f32)max4)[0];
12711265

12721266
// Quantize these floats
12731267
const float d = max_scalar / 127.f;
@@ -6139,9 +6133,7 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
61396133
acc_m = __lsx_vfadd_s(acc_m, (__m128)tmp1);
61406134

61416135

6142-
ft_union fi;
6143-
fi.i = __lsx_vpickve2gr_w(acc_m, 0);
6144-
*s = hsum_float_8(acc) + fi.f ;
6136+
*s = hsum_float_8(acc) + ((v4f32)acc_m)[0];
61456137
#else
61466138

61476139
const uint8_t * scales = (const uint8_t*)&utmp[0];

0 commit comments

Comments
 (0)