33#include "ggml-quants.h"
44#include "ggml-impl.h"
55#include "ggml-cpu.h"
6- #include "simd-mappings.h"
76
87#include "../../quants.h"
98#include "../../ggml-cpu-impl.h"
@@ -46,7 +45,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
4645 const float d = amax / ((1 << 7 ) - 1 );
4746 const float id = d ? 1.0f /d : 0.0f ;
4847
49- y [i ].d = GGML_CPU_FP32_TO_FP16 (d );
48+ y [i ].d = GGML_FP32_TO_FP16 (d );
5049
5150 vfloat32m8_t x0 = __riscv_vfmul_vf_f32m8 (v_x , id , vl );
5251
@@ -86,7 +85,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
8685 const float d = amax / ((1 << 7 ) - 1 );
8786 const float id = d ? 1.0f /d : 0.0f ;
8887
89- y [i ].d = GGML_CPU_FP32_TO_FP16 (d );
88+ y [i ].d = GGML_FP32_TO_FP16 (d );
9089
9190 vfloat32m8_t x0 = __riscv_vfmul_vf_f32m8 (v_x , id , vl );
9291
@@ -103,7 +102,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
103102
104103 // set y[i].s
105104 int sum = __riscv_vmv_x_s_i16m1_i16 (vwrs );
106- y [i ].s = GGML_CPU_FP32_TO_FP16 (sum * d );
105+ y [i ].s = GGML_FP32_TO_FP16 (sum * d );
107106 }
108107
109108#else
@@ -161,7 +160,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
161160
162161 int sumi = __riscv_vmv_x_s_i32m1_i32 (vs2 );
163162
164- sumf += sumi * GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d );
163+ sumf += sumi * GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d );
165164 }
166165
167166#endif
@@ -178,7 +177,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
178177 }
179178
180179 int sumi = sumi0 + sumi1 ;
181- sumf += sumi * GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d );
180+ sumf += sumi * GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d );
182181 }
183182
184183 * s = sumf ;
@@ -226,7 +225,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
226225
227226 int sumi = __riscv_vmv_x_s_i32m1_i32 (vs2 );
228227
229- sumf += (GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_CPU_FP16_TO_FP32 (x [ib ].m )* GGML_CPU_FP16_TO_FP32 (y [ib ].s );
228+ sumf += (GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_FP16_TO_FP32 (x [ib ].m )* GGML_FP16_TO_FP32 (y [ib ].s );
230229 }
231230
232231#endif
@@ -243,7 +242,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
243242 }
244243
245244 int sumi = sumi0 + sumi1 ;
246- sumf += (GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_CPU_FP16_TO_FP32 (x [ib ].m )* GGML_CPU_FP16_TO_FP32 (y [ib ].s );
245+ sumf += (GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_FP16_TO_FP32 (x [ib ].m )* GGML_FP16_TO_FP32 (y [ib ].s );
247246 }
248247
249248 * s = sumf ;
@@ -294,7 +293,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
294293 vint32m1_t sum = __riscv_vwredsum_vs_i16m4_i32m1 (mul , zero , vl );
295294 int32_t sumi = __riscv_vmv_x_s_i32m1_i32 (sum );
296295
297- sumf += (GGML_CPU_FP16_TO_FP32 (x [ib ].d ) * GGML_CPU_FP16_TO_FP32 (y [ib ].d )) * sumi ;
296+ sumf += (GGML_FP16_TO_FP32 (x [ib ].d ) * GGML_FP16_TO_FP32 (y [ib ].d )) * sumi ;
298297 }
299298
300299#endif
@@ -317,7 +316,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
317316 }
318317
319318 int sumi = sumi0 + sumi1 ;
320- sumf += (GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d )) * sumi ;
319+ sumf += (GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d )) * sumi ;
321320 }
322321
323322 * s = sumf ;
@@ -367,7 +366,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
367366 vint32m1_t sum = __riscv_vwredsum_vs_i16m4_i32m1 (mul , zero , vl );
368367 int32_t sumi = __riscv_vmv_x_s_i32m1_i32 (sum );
369368
370- sumf += (GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_CPU_FP16_TO_FP32 (x [ib ].m )* GGML_CPU_FP16_TO_FP32 (y [ib ].s );
369+ sumf += (GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_FP16_TO_FP32 (x [ib ].m )* GGML_FP16_TO_FP32 (y [ib ].s );
371370 }
372371
373372#endif
@@ -390,7 +389,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
390389 }
391390
392391 int sumi = sumi0 + sumi1 ;
393- sumf += (GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_CPU_FP16_TO_FP32 (x [ib ].m )* GGML_CPU_FP16_TO_FP32 (y [ib ].s );
392+ sumf += (GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d ))* sumi + GGML_FP16_TO_FP32 (x [ib ].m )* GGML_FP16_TO_FP32 (y [ib ].s );
394393 }
395394
396395 * s = sumf ;
@@ -428,7 +427,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
428427
429428 int sumi = __riscv_vmv_x_s_i32m1_i32 (v_sum );
430429
431- sumf += sumi * (GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d ));
430+ sumf += sumi * (GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d ));
432431 }
433432
434433#endif
@@ -439,7 +438,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
439438 sumi += x [ib ].qs [j ]* y [ib ].qs [j ];
440439 }
441440
442- sumf += sumi * (GGML_CPU_FP16_TO_FP32 (x [ib ].d )* GGML_CPU_FP16_TO_FP32 (y [ib ].d ));
441+ sumf += sumi * (GGML_FP16_TO_FP32 (x [ib ].d )* GGML_FP16_TO_FP32 (y [ib ].d ));
443442 }
444443
445444 * s = sumf ;
@@ -466,8 +465,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
466465 const uint8_t * q2 = x [i ].qs ;
467466 const int8_t * q8 = y [i ].qs ;
468467 const uint8_t * sc = x [i ].scales ;
469- const float dall = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].d );
470- const float dmin = - y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].dmin );
468+ const float dall = y [i ].d * GGML_FP16_TO_FP32 (x [i ].d );
469+ const float dmin = - y [i ].d * GGML_FP16_TO_FP32 (x [i ].dmin );
471470 uint8_t * patmp = atmp ;
472471 int vsums ;
473472 int tmp ;
@@ -570,8 +569,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
570569 const int8_t * q8 = y [i ].qs ;
571570 const uint8_t * sc = x [i ].scales ;
572571
573- const float dall = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].d );
574- const float dmin = - y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].dmin );
572+ const float dall = y [i ].d * GGML_FP16_TO_FP32 (x [i ].d );
573+ const float dmin = - y [i ].d * GGML_FP16_TO_FP32 (x [i ].dmin );
575574
576575 size_t vl = 16 ;
577576
@@ -645,8 +644,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
645644 const uint8_t * q2 = x [i ].qs ;
646645 const int8_t * q8 = y [i ].qs ;
647646 const uint8_t * sc = x [i ].scales ;
648- const float dall = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].d );
649- const float dmin = - y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].dmin );
647+ const float dall = y [i ].d * GGML_FP16_TO_FP32 (x [i ].d );
648+ const float dmin = - y [i ].d * GGML_FP16_TO_FP32 (x [i ].dmin );
650649 uint8_t * patmp = atmp ;
651650 int vsums ;
652651 int tmp ;
@@ -751,8 +750,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
751750 summs += y [i ].bsums [j ] * (sc [j ] >> 4 );
752751 }
753752
754- const float dall = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].d );
755- const float dmin = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].dmin );
753+ const float dall = y [i ].d * GGML_FP16_TO_FP32 (x [i ].d );
754+ const float dmin = y [i ].d * GGML_FP16_TO_FP32 (x [i ].dmin );
756755
757756 int isum = 0 ;
758757 int is = 0 ;
@@ -917,7 +916,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
917916 q3 += 32 ; q8 += 128 ; scale += 8 ;
918917 }
919918
920- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
919+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
921920 sumf += d * isum ;
922921 }
923922
@@ -1018,7 +1017,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
10181017
10191018 }
10201019
1021- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1020+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
10221021
10231022 sumf += d * sum_t ;
10241023
@@ -1135,7 +1134,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
11351134 q3 += 32 ; q8 += 128 ; scale += 8 ;
11361135 }
11371136
1138- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1137+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
11391138 sumf += d * isum ;
11401139 }
11411140 break ;
@@ -1203,7 +1202,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
12031202 for (int l = 0 ; l < 8 ; ++ l ) aux32 [l ] += (scales [j ] - 32 ) * aux16 [l ];
12041203 q8 += 8 ; a += 8 ;
12051204 }
1206- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1205+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
12071206 for (int l = 0 ; l < 8 ; ++ l ) sums [l ] += d * aux32 [l ];
12081207 }
12091208 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
@@ -1240,8 +1239,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
12401239 float sumf = 0 ;
12411240
12421241 for (int i = 0 ; i < nb ; ++ i ) {
1243- const float d = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].d );
1244- const float dmin = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].dmin );
1242+ const float d = y [i ].d * GGML_FP16_TO_FP32 (x [i ].d );
1243+ const float dmin = y [i ].d * GGML_FP16_TO_FP32 (x [i ].dmin );
12451244
12461245 int tmp , tmp2 , sumi ;
12471246 __asm__ __volatile__(
@@ -1362,8 +1361,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
13621361
13631362 size_t vl = 8 ;
13641363
1365- const float d = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].d );
1366- const float dmin = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].dmin );
1364+ const float d = y [i ].d * GGML_FP16_TO_FP32 (x [i ].d );
1365+ const float dmin = y [i ].d * GGML_FP16_TO_FP32 (x [i ].dmin );
13671366
13681367 vint16mf2_t q8sums_0 = __riscv_vlse16_v_i16mf2 (y [i ].bsums , 4 , vl );
13691368 vint16mf2_t q8sums_1 = __riscv_vlse16_v_i16mf2 (y [i ].bsums + 1 , 4 , vl );
@@ -1423,8 +1422,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
14231422 break ;
14241423 case 128 :
14251424 for (int i = 0 ; i < nb ; ++ i ) {
1426- const float d = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].d );
1427- const float dmin = y [i ].d * GGML_CPU_FP16_TO_FP32 (x [i ].dmin );
1425+ const float d = y [i ].d * GGML_FP16_TO_FP32 (x [i ].d );
1426+ const float dmin = y [i ].d * GGML_FP16_TO_FP32 (x [i ].dmin );
14281427
14291428 int tmp , tmp2 , sumi ;
14301429 __asm__ __volatile__(
@@ -1581,9 +1580,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
15811580 for (int l = 0 ; l < 8 ; ++ l ) aux32 [l ] += scale * aux16 [l ];
15821581 q8 += 8 ; a += 8 ;
15831582 }
1584- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1583+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
15851584 for (int l = 0 ; l < 8 ; ++ l ) sums [l ] += d * aux32 [l ];
1586- const float dmin = GGML_CPU_FP16_TO_FP32 (x [i ].dmin ) * y [i ].d ;
1585+ const float dmin = GGML_FP16_TO_FP32 (x [i ].dmin ) * y [i ].d ;
15871586 sumf -= dmin * sumi ;
15881587 }
15891588 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
@@ -1628,8 +1627,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
16281627 const uint8_t * GGML_RESTRICT hm = x [i ].qh ;
16291628 const int8_t * GGML_RESTRICT q8 = y [i ].qs ;
16301629
1631- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1632- const float dmin = GGML_CPU_FP16_TO_FP32 (x [i ].dmin ) * y [i ].d ;
1630+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1631+ const float dmin = GGML_FP16_TO_FP32 (x [i ].dmin ) * y [i ].d ;
16331632
16341633 vint16m1_t q8sums_0 = __riscv_vlse16_v_i16m1 (y [i ].bsums , 4 , vl );
16351634 vint16m1_t q8sums_1 = __riscv_vlse16_v_i16m1 (y [i ].bsums + 1 , 4 , vl );
@@ -1750,9 +1749,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
17501749 for (int l = 0 ; l < 8 ; ++ l ) aux32 [l ] += scale * aux16 [l ];
17511750 q8 += 8 ; a += 8 ;
17521751 }
1753- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1752+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
17541753 for (int l = 0 ; l < 8 ; ++ l ) sums [l ] += d * aux32 [l ];
1755- const float dmin = GGML_CPU_FP16_TO_FP32 (x [i ].dmin ) * y [i ].d ;
1754+ const float dmin = GGML_FP16_TO_FP32 (x [i ].dmin ) * y [i ].d ;
17561755 sumf -= dmin * sumi ;
17571756 }
17581757 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
@@ -1779,7 +1778,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
17791778
17801779 for (int i = 0 ; i < nb ; ++ i ) {
17811780
1782- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1781+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
17831782
17841783 const uint8_t * restrict q6 = x [i ].ql ;
17851784 const uint8_t * restrict qh = x [i ].qh ;
@@ -1863,7 +1862,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
18631862 case 256 :
18641863 for (int i = 0 ; i < nb ; ++ i ) {
18651864
1866- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1865+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
18671866
18681867 const uint8_t * GGML_RESTRICT q6 = x [i ].ql ;
18691868 const uint8_t * GGML_RESTRICT qh = x [i ].qh ;
@@ -1944,7 +1943,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
19441943 case 128 :
19451944 for (int i = 0 ; i < nb ; ++ i ) {
19461945
1947- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
1946+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
19481947
19491948 const uint8_t * restrict q6 = x [i ].ql ;
19501949 const uint8_t * restrict qh = x [i ].qh ;
@@ -2059,7 +2058,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
20592058 for (int l = 0 ; l < 8 ; ++ l ) aux32 [l ] += scale * aux16 [l ];
20602059 q8 += 8 ; a += 8 ;
20612060 }
2062- const float d = GGML_CPU_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
2061+ const float d = GGML_FP16_TO_FP32 (x [i ].d ) * y [i ].d ;
20632062 for (int l = 0 ; l < 8 ; ++ l ) sums [l ] += d * aux32 [l ];
20642063 }
20652064 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
0 commit comments