@@ -35,15 +35,15 @@ void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
3535 quantize_row_q5_1_ref (x , y , k );
3636}
3737
38- GGML_CPU_NATIVE_IMPL (quantize_row_q8_0 )
3938void quantize_row_q8_0_generic (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int64_t k ) {
4039 quantize_row_q8_0_ref (x , y , k );
4140}
41+ GGML_CPU_NATIVE_IMPL (quantize_row_q8_0 )
4242
43- GGML_CPU_NATIVE_IMPL (quantize_row_q8_1 )
4443void quantize_row_q8_1_generic (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int64_t k ) {
4544 quantize_row_q8_1_ref (x , y , k );
4645}
46+ GGML_CPU_NATIVE_IMPL (quantize_row_q8_1 )
4747
4848//
4949// 2-6 bit quantization in super-blocks
@@ -113,7 +113,6 @@ void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
113113
114114//===================================== Dot products =================================
115115
116- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q4_0_q8_0 )
117116void ggml_vec_dot_q4_0_q8_0_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
118117 const int qk = QK8_0 ;
119118 const int nb = n / qk ;
@@ -149,9 +148,9 @@ void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
149148
150149 * s = sumf ;
151150}
151+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q4_0_q8_0 )
152152
153153// TODO: add WASM SIMD
154- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q4_1_q8_1 )
155154void ggml_vec_dot_q4_1_q8_1_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
156155 const int qk = QK8_1 ;
157156 const int nb = n / qk ;
@@ -187,8 +186,8 @@ void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c
187186
188187 * s = sumf ;
189188}
189+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q4_1_q8_1 )
190190
191- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q5_0_q8_0 )
192191void ggml_vec_dot_q5_0_q8_0_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
193192 const int qk = QK8_0 ;
194193 const int nb = n / qk ;
@@ -231,8 +230,8 @@ void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
231230
232231 * s = sumf ;
233232}
233+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q5_0_q8_0 )
234234
235- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q5_1_q8_1 )
236235void ggml_vec_dot_q5_1_q8_1_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
237236 const int qk = QK8_1 ;
238237 const int nb = n / qk ;
@@ -275,8 +274,8 @@ void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c
275274
276275 * s = sumf ;
277276}
277+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q5_1_q8_1 )
278278
279- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q8_0_q8_0 )
280279void ggml_vec_dot_q8_0_q8_0_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
281280 const int qk = QK8_0 ;
282281 const int nb = n / qk ;
@@ -306,8 +305,8 @@ void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
306305
307306 * s = sumf ;
308307}
308+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q8_0_q8_0 )
309309
310- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_tq1_0_q8_K )
311310void ggml_vec_dot_tq1_0_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
312311 assert (nrc == 1 );
313312 UNUSED (nrc );
@@ -359,8 +358,8 @@ void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
359358
360359 * s = sumf ;
361360}
361+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_tq1_0_q8_K )
362362
363- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_tq2_0_q8_K )
364363void ggml_vec_dot_tq2_0_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
365364 assert (nrc == 1 );
366365 UNUSED (nrc );
@@ -392,8 +391,8 @@ void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
392391
393392 * s = sumf ;
394393}
394+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_tq2_0_q8_K )
395395
396- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q2_K_q8_K )
397396void ggml_vec_dot_q2_K_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
398397 assert (nrc == 1 );
399398 UNUSED (nrc );
@@ -445,8 +444,8 @@ void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
445444 }
446445 * s = sumf ;
447446}
447+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q2_K_q8_K )
448448
449- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q3_K_q8_K )
450449void ggml_vec_dot_q3_K_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
451450 assert (n % QK_K == 0 );
452451 assert (nrc == 1 );
@@ -525,8 +524,8 @@ void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
525524 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
526525 * s = sumf ;
527526}
527+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q3_K_q8_K )
528528
529- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q4_K_q8_K )
530529void ggml_vec_dot_q4_K_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
531530 assert (n % QK_K == 0 );
532531 assert (nrc == 1 );
@@ -601,8 +600,8 @@ void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
601600 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
602601 * s = sumf ;
603602}
603+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q4_K_q8_K )
604604
605- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q5_K_q8_K )
606605void ggml_vec_dot_q5_K_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
607606 assert (n % QK_K == 0 );
608607 assert (nrc == 1 );
@@ -682,8 +681,8 @@ void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
682681 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
683682 * s = sumf ;
684683}
684+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q5_K_q8_K )
685685
686- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q6_K_q8_K )
687686void ggml_vec_dot_q6_K_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
688687 assert (n % QK_K == 0 );
689688 assert (nrc == 1 );
@@ -738,8 +737,8 @@ void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
738737 for (int l = 0 ; l < 8 ; ++ l ) sumf += sums [l ];
739738 * s = sumf ;
740739}
740+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_q6_K_q8_K )
741741
742- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq2_xxs_q8_K )
743742void ggml_vec_dot_iq2_xxs_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
744743 assert (n % QK_K == 0 );
745744 assert (nrc == 1 );
@@ -781,8 +780,8 @@ void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs
781780 }
782781 * s = 0.125f * sumf ;
783782}
783+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq2_xxs_q8_K )
784784
785- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq2_xs_q8_K )
786785void ggml_vec_dot_iq2_xs_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
787786 assert (n % QK_K == 0 );
788787 assert (nrc == 1 );
@@ -832,8 +831,8 @@ void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
832831 }
833832 * s = 0.125f * sumf ;
834833}
834+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq2_xs_q8_K )
835835
836- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq2_s_q8_K )
837836void ggml_vec_dot_iq2_s_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
838837 assert (n % QK_K == 0 );
839838 assert (nrc == 1 );
@@ -885,8 +884,8 @@ void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
885884
886885 * s = 0.125f * sumf ;
887886}
887+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq2_s_q8_K )
888888
889- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq3_xxs_q8_K )
890889void ggml_vec_dot_iq3_xxs_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
891890 assert (n % QK_K == 0 );
892891 assert (nrc == 1 );
@@ -930,8 +929,8 @@ void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs
930929 }
931930 * s = 0.25f * sumf ;
932931}
932+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq3_xxs_q8_K )
933933
934- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq3_s_q8_K )
935934void ggml_vec_dot_iq3_s_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
936935 assert (n % QK_K == 0 );
937936 assert (nrc == 1 );
@@ -987,8 +986,8 @@ void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
987986 }
988987 * s = sumf ;
989988}
989+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq3_s_q8_K )
990990
991- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq1_s_q8_K )
992991void ggml_vec_dot_iq1_s_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
993992 assert (n % QK_K == 0 );
994993 assert (nrc == 1 );
@@ -1031,8 +1030,8 @@ void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
10311030
10321031 * s = sumf ;
10331032}
1033+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq1_s_q8_K )
10341034
1035- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq1_m_q8_K )
10361035void ggml_vec_dot_iq1_m_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
10371036 assert (n % QK_K == 0 );
10381037 assert (nrc == 1 );
@@ -1093,8 +1092,8 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
10931092
10941093 * s = sumf ;
10951094}
1095+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq1_m_q8_K )
10961096
1097- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq4_nl_q8_0 )
10981097void ggml_vec_dot_iq4_nl_q8_0_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
10991098 assert (nrc == 1 );
11001099 UNUSED (nrc );
@@ -1123,8 +1122,8 @@ void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs,
11231122 }
11241123 * s = sumf ;
11251124}
1125+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq4_nl_q8_0 )
11261126
1127- GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq4_xs_q8_K )
11281127void ggml_vec_dot_iq4_xs_q8_K_generic (int n , float * GGML_RESTRICT s , size_t bs , const void * GGML_RESTRICT vx , size_t bx , const void * GGML_RESTRICT vy , size_t by , int nrc ) {
11291128 assert (nrc == 1 );
11301129 UNUSED (nrc );
@@ -1170,6 +1169,7 @@ void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
11701169 }
11711170 * s = sumf ;
11721171}
1172+ GGML_CPU_NATIVE_IMPL (ggml_vec_dot_iq4_xs_q8_K )
11731173
11741174// ============================ 4-bit non-linear quants
11751175
0 commit comments