| 
5 | 5 | #include "ggml-quants.h"  | 
6 | 6 | #include "quants.h"  | 
7 | 7 | 
 
  | 
8 |  | -#if defined(__APPLE__)  | 
9 |  | -#include "apple-fallback.h"  | 
10 |  | -#endif  | 
 | 8 | +#include "arch-fallback.h"  | 
11 | 9 | 
 
  | 
12 | 10 | #include <string.h>  | 
13 | 11 | #include <assert.h>  | 
@@ -42,12 +40,10 @@ void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in  | 
42 | 40 | void quantize_row_q8_0_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {  | 
43 | 41 |     quantize_row_q8_0_ref(x, y, k);  | 
44 | 42 | }  | 
45 |  | -GGML_CPU_NATIVE_IMPL(quantize_row_q8_0)  | 
46 | 43 | 
 
  | 
47 | 44 | void quantize_row_q8_1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {  | 
48 | 45 |     quantize_row_q8_1_ref(x, y, k);  | 
49 | 46 | }  | 
50 |  | -GGML_CPU_NATIVE_IMPL(quantize_row_q8_1)  | 
51 | 47 | 
 
  | 
52 | 48 | //  | 
53 | 49 | // 2-6 bit quantization in super-blocks  | 
@@ -108,7 +104,6 @@ void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy,  | 
108 | 104 | void quantize_row_q8_K_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {  | 
109 | 105 |     quantize_row_q8_K_ref(x, y, k);  | 
110 | 106 | }  | 
111 |  | -GGML_CPU_NATIVE_IMPL(quantize_row_q8_K)  | 
112 | 107 | 
 
  | 
113 | 108 | //===================================== Dot products =================================  | 
114 | 109 | 
 
  | 
@@ -147,7 +142,6 @@ void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
147 | 142 | 
 
  | 
148 | 143 |     *s = sumf;  | 
149 | 144 | }  | 
150 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_0_q8_0)  | 
151 | 145 | 
 
  | 
152 | 146 | // TODO: add WASM SIMD  | 
153 | 147 | void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
@@ -185,7 +179,6 @@ void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
185 | 179 | 
 
  | 
186 | 180 |     *s = sumf;  | 
187 | 181 | }  | 
188 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_1_q8_1)  | 
189 | 182 | 
 
  | 
190 | 183 | void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
191 | 184 |     const int qk = QK8_0;  | 
@@ -229,7 +222,6 @@ void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
229 | 222 | 
 
  | 
230 | 223 |     *s = sumf;  | 
231 | 224 | }  | 
232 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_0_q8_0)  | 
233 | 225 | 
 
  | 
234 | 226 | void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
235 | 227 |     const int qk = QK8_1;  | 
@@ -273,7 +265,6 @@ void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
273 | 265 | 
 
  | 
274 | 266 |     *s = sumf;  | 
275 | 267 | }  | 
276 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_1_q8_1)  | 
277 | 268 | 
 
  | 
278 | 269 | void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
279 | 270 |     const int qk = QK8_0;  | 
@@ -304,7 +295,6 @@ void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
304 | 295 | 
 
  | 
305 | 296 |     *s = sumf;  | 
306 | 297 | }  | 
307 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q8_0_q8_0)  | 
308 | 298 | 
 
  | 
309 | 299 | void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
310 | 300 |     assert(nrc == 1);  | 
@@ -357,7 +347,6 @@ void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
357 | 347 | 
 
  | 
358 | 348 |     *s = sumf;  | 
359 | 349 | }  | 
360 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq1_0_q8_K)  | 
361 | 350 | 
 
  | 
362 | 351 | void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
363 | 352 |     assert(nrc == 1);  | 
@@ -390,7 +379,6 @@ void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
390 | 379 | 
 
  | 
391 | 380 |     *s = sumf;  | 
392 | 381 | }  | 
393 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq2_0_q8_K)  | 
394 | 382 | 
 
  | 
395 | 383 | void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
396 | 384 |     assert(nrc == 1);  | 
@@ -443,7 +431,6 @@ void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
443 | 431 |     }  | 
444 | 432 |     *s = sumf;  | 
445 | 433 | }  | 
446 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q2_K_q8_K)  | 
447 | 434 | 
 
  | 
448 | 435 | void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
449 | 436 |     assert(n % QK_K == 0);  | 
@@ -523,7 +510,6 @@ void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
523 | 510 |     for (int l = 0; l < 8; ++l) sumf += sums[l];  | 
524 | 511 |     *s = sumf;  | 
525 | 512 | }  | 
526 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q3_K_q8_K)  | 
527 | 513 | 
 
  | 
528 | 514 | void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
529 | 515 |     assert(n % QK_K == 0);  | 
@@ -599,7 +585,6 @@ void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
599 | 585 |     for (int l = 0; l < 8; ++l) sumf += sums[l];  | 
600 | 586 |     *s = sumf;  | 
601 | 587 | }  | 
602 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_K_q8_K)  | 
603 | 588 | 
 
  | 
604 | 589 | void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy,  size_t by, int nrc) {  | 
605 | 590 |     assert(n % QK_K == 0);  | 
@@ -680,7 +665,6 @@ void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
680 | 665 |     for (int l = 0; l < 8; ++l) sumf += sums[l];  | 
681 | 666 |     *s = sumf;  | 
682 | 667 | }  | 
683 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_K_q8_K)  | 
684 | 668 | 
 
  | 
685 | 669 | void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
686 | 670 |     assert(n % QK_K == 0);  | 
@@ -736,7 +720,6 @@ void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c  | 
736 | 720 |     for (int l = 0; l < 8; ++l) sumf += sums[l];  | 
737 | 721 |     *s = sumf;  | 
738 | 722 | }  | 
739 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q6_K_q8_K)  | 
740 | 723 | 
 
  | 
741 | 724 | void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
742 | 725 |     assert(n % QK_K == 0);  | 
@@ -779,7 +762,6 @@ void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs  | 
779 | 762 |     }  | 
780 | 763 |     *s = 0.125f * sumf;  | 
781 | 764 | }  | 
782 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xxs_q8_K)  | 
783 | 765 | 
 
  | 
784 | 766 | void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
785 | 767 |     assert(n % QK_K == 0);  | 
@@ -830,7 +812,6 @@ void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
830 | 812 |     }  | 
831 | 813 |     *s = 0.125f * sumf;  | 
832 | 814 | }  | 
833 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xs_q8_K)  | 
834 | 815 | 
 
  | 
835 | 816 | void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
836 | 817 |     assert(n % QK_K == 0);  | 
@@ -883,7 +864,6 @@ void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
883 | 864 | 
 
  | 
884 | 865 |     *s = 0.125f * sumf;  | 
885 | 866 | }  | 
886 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_s_q8_K)  | 
887 | 867 | 
 
  | 
888 | 868 | void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
889 | 869 |     assert(n % QK_K == 0);  | 
@@ -928,7 +908,6 @@ void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs  | 
928 | 908 |     }  | 
929 | 909 |     *s = 0.25f * sumf;  | 
930 | 910 | }  | 
931 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_xxs_q8_K)  | 
932 | 911 | 
 
  | 
933 | 912 | void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
934 | 913 |     assert(n % QK_K == 0);  | 
@@ -985,7 +964,6 @@ void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
985 | 964 |     }  | 
986 | 965 |     *s = sumf;  | 
987 | 966 | }  | 
988 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_s_q8_K)  | 
989 | 967 | 
 
  | 
990 | 968 | void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
991 | 969 |     assert(n % QK_K == 0);  | 
@@ -1029,7 +1007,6 @@ void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
1029 | 1007 | 
 
  | 
1030 | 1008 |     *s = sumf;  | 
1031 | 1009 | }  | 
1032 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_s_q8_K)  | 
1033 | 1010 | 
 
  | 
1034 | 1011 | void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
1035 | 1012 |     assert(n % QK_K == 0);  | 
@@ -1091,7 +1068,6 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
1091 | 1068 | 
 
  | 
1092 | 1069 |     *s = sumf;  | 
1093 | 1070 | }  | 
1094 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_m_q8_K)  | 
1095 | 1071 | 
 
  | 
1096 | 1072 | void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
1097 | 1073 |     assert(nrc == 1);  | 
@@ -1121,7 +1097,6 @@ void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
1121 | 1097 |     }  | 
1122 | 1098 |     *s = sumf;  | 
1123 | 1099 | }  | 
1124 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_nl_q8_0)  | 
1125 | 1100 | 
 
  | 
1126 | 1101 | void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {  | 
1127 | 1102 |     assert(nrc == 1);  | 
@@ -1168,7 +1143,6 @@ void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,  | 
1168 | 1143 |     }  | 
1169 | 1144 |     *s = sumf;  | 
1170 | 1145 | }  | 
1171 |  | -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_xs_q8_K)  | 
1172 | 1146 | 
 
  | 
1173 | 1147 | // ============================ 4-bit non-linear quants  | 
1174 | 1148 | 
 
  | 
 | 
0 commit comments