Skip to content

Commit 45aa1db

Browse files
committed
ggml : optimize loongarch_asx extend i16,i8,u8 to i32,i16
1 parent e6d955e commit 45aa1db

File tree

1 file changed

+3
-18
lines changed

1 file changed

+3
-18
lines changed

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -501,30 +501,15 @@ static __m256i lasx_shuffle_b(__m256i a, __m256i b) {
501501
}
502502

503503
static __m256i lasx_extu8_16(__m128i a) {
504-
__m128i zero = __lsx_vldi(0);
505-
__m128i vlo = __lsx_vilvl_b(zero, a);
506-
__m128i vhi = __lsx_vilvh_b(zero, a);
507-
return lasx_set_q(vhi, vlo);
504+
return __lasx_vext2xv_hu_bu(____m256i(a));
508505
}
509506

510507
static __m256i lasx_ext8_16(__m128i a) {
511-
__m128i sign = __lsx_vslti_b(a, 0);
512-
__m128i vlo = __lsx_vilvl_b(sign, a);
513-
__m128i vhi = __lsx_vilvh_b(sign, a);
514-
return lasx_set_q(vhi, vlo);
508+
return __lasx_vext2xv_h_b(____m256i(a));
515509
}
516510

517511
static __m256i lasx_ext16_32(__m128i a) {
518-
__m256i tmp1;
519-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 0), 0);
520-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 1), 1);
521-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 2), 2);
522-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 3), 3);
523-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 4), 4);
524-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 5), 5);
525-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 6), 6);
526-
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 7), 7);
527-
return tmp1;
512+
return __lasx_vext2xv_w_h(____m256i(a));
528513
}
529514

530515
static __m128i lasx_extracti128( __m256i a, int pos) {

0 commit comments

Comments
 (0)