@@ -883,7 +883,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
883883 _mm_storeu_si128((__m128i *)(y[i].qs + 16), ni4);
884884#endif
885885 }
886- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector )
886+ #elif defined(__riscv_v )
887887
888888 size_t vl = QK8_0;
889889
@@ -1221,7 +1221,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
12211221 _mm_storeu_si128((__m128i *)(y[i].qs + 16), ni4);
12221222#endif
12231223 }
1224- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector )
1224+ #elif defined(__riscv_v )
12251225
12261226 size_t vl = QK8_1;
12271227
@@ -2384,7 +2384,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
23842384 }
23852385
23862386 sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
2387- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector )
2387+ #elif defined(__riscv_v )
23882388 size_t vl = qk / 2;
23892389
23902390 for (; ib < nb; ++ib) {
@@ -2774,7 +2774,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
27742774 }
27752775
27762776 sumf = hsum_float_8(acc) + summs;
2777- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector )
2777+ #elif defined(__riscv_v )
27782778 size_t vl = qk / 2;
27792779
27802780 for (; ib < nb; ++ib) {
@@ -3121,7 +3121,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
31213121 }
31223122
31233123 sumf = hsum_float_8(acc);
3124- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector )
3124+ #elif defined(__riscv_v )
31253125 size_t vl;
31263126 size_t vlenb = __riscv_vlenb();
31273127
@@ -3460,7 +3460,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
34603460 }
34613461
34623462 sumf = hsum_float_8(acc) + summs;
3463- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector )
3463+ #elif defined(__riscv_v )
34643464 size_t vl;
34653465 size_t vlenb = __riscv_vlenb();
34663466
@@ -3897,7 +3897,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
38973897 }
38983898
38993899 sumf = hsum_float_8(accum);
3900- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector )
3900+ #elif defined(__riscv_v )
39013901 size_t vl = qk;
39023902
39033903 for (; ib < nb; ++ib) {
@@ -5100,13 +5100,11 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
51005100
51015101 *s = sumf;
51025102
5103- #elif defined __riscv_v_intrinsic
5103+ #elif defined __riscv_xtheadvector
51045104
51055105 float sumf = 0;
5106-
51075106 uint8_t atmp[16];
51085107
5109- #if defined(__riscv_xtheadvector)
51105108 for (int i = 0; i < nb; ++i) {
51115109 const uint8_t * q2 = x[i].qs;
51125110 const int8_t * q8 = y[i].qs;
@@ -5196,7 +5194,14 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
51965194
51975195 sumf += dall * isum;
51985196 }
5199- #else
5197+
5198+ *s = sumf;
5199+
5200+ #elif defined __riscv_v
5201+
5202+ float sumf = 0;
5203+ uint8_t atmp[16];
5204+
52005205 const int vector_length = __riscv_vlenb() * 8;
52015206 uint8_t temp_01[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52025207 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
@@ -5371,7 +5376,6 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
53715376 assert(false && "Unsupported vector length");
53725377 break;
53735378 }
5374- #endif // defined(__riscv_xtheadvector)
53755379
53765380 *s = sumf;
53775381
@@ -6230,13 +6234,11 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
62306234
62316235 *s = sumf;
62326236
6233- #elif defined __riscv_v_intrinsic
6237+ #elif defined __riscv_xtheadvector
62346238
62356239 uint32_t utmp[4];
6236-
62376240 float sumf = 0;
62386241
6239- #if defined(__riscv_xtheadvector)
62406242 for (int i = 0; i < nb; ++i) {
62416243 const uint8_t * restrict q3 = x[i].qs;
62426244 const uint8_t * restrict qh = x[i].hmask;
@@ -6357,7 +6359,13 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
63576359 const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
63586360 sumf += d * isum;
63596361 }
6360- #else
6362+
6363+ *s = sumf;
6364+
6365+ #elif defined __riscv_v
6366+
6367+ uint32_t utmp[4];
6368+ float sumf = 0;
63616369 uint32_t aux[3];
63626370 const int vector_length = __riscv_vlenb() * 8;
63636371
@@ -6574,7 +6582,6 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
65746582 assert(false && "Unsupported vector length");
65756583 break;
65766584 }
6577- #endif // defined(__riscv_xtheadvector)
65786585
65796586 *s = sumf;
65806587
@@ -7397,14 +7404,13 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
73977404
73987405 *s = hsum_float_8(acc) + _mm_cvtss_f32(acc_m);
73997406
7400- #elif defined __riscv_v_intrinsic
7407+ #elif defined __riscv_xtheadvector
74017408
74027409 const uint8_t * scales = (const uint8_t*)&utmp[0];
74037410 const uint8_t * mins = (const uint8_t*)&utmp[2];
74047411
74057412 float sumf = 0;
74067413
7407- #if defined(__riscv_xtheadvector)
74087414 for (int i = 0; i < nb; ++i) {
74097415 const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
74107416 const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
@@ -7511,7 +7517,15 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
75117517 sumf += d * sumi;
75127518
75137519 }
7514- #else
7520+
7521+ *s = sumf;
7522+
7523+ #elif defined __riscv_v
7524+
7525+ const uint8_t * scales = (const uint8_t*)&utmp[0];
7526+ const uint8_t * mins = (const uint8_t*)&utmp[2];
7527+
7528+ float sumf = 0;
75157529 const int vector_length = __riscv_vlenb() * 8;
75167530
75177531 switch (vector_length) {
@@ -7687,7 +7701,6 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
76877701 assert(false && "Unsupported vector length");
76887702 break;
76897703 }
7690- #endif
76917704
76927705 *s = sumf;
76937706
@@ -8401,7 +8414,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
84018414
84028415 *s = sumf;
84038416
8404- #elif defined(__riscv_v_intrinsic) && !defined(__riscv_xtheadvector)
8417+ #elif defined __riscv_v
84058418
84068419 const uint8_t * scales = (const uint8_t*)&utmp[0];
84078420 const uint8_t * mins = (const uint8_t*)&utmp[2];
@@ -9559,11 +9572,10 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
95599572 }
95609573 *s = sumf;
95619574
9562- #elif defined __riscv_v_intrinsic
9575+ #elif defined __riscv_xtheadvector
95639576
95649577 float sumf = 0;
95659578
9566- #if defined(__riscv_xtheadvector)
95679579 for (int i = 0; i < nb; ++i) {
95689580
95699581 const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
@@ -9638,7 +9650,12 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
96389650 sumf += d * sum_t;
96399651
96409652 }
9641- #else
9653+
9654+ *s = sumf;
9655+
9656+ #elif defined __riscv_v
9657+
9658+ float sumf = 0;
96429659 const int vector_length = __riscv_vlenb() * 8;
96439660
96449661 switch (vector_length) {
@@ -9801,7 +9818,6 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
98019818 assert(false && "Unsupported vector length");
98029819 break;
98039820 }
9804- #endif // defined(__riscv_xtheadvector)
98059821
98069822 *s = sumf;
98079823
0 commit comments