|
6 | 6 | #include "ggml-impl.h" |
7 | 7 | #include "ggml-cpu.h" |
8 | 8 | #include "ggml-cpu-impl.h" |
9 | | -#include "simd-mappings.h" |
10 | 9 | #include "traits.h" |
11 | 10 |
|
12 | 11 | #include <cmath> |
@@ -52,7 +51,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR |
52 | 51 | const float d = amax / ((1 << 7) - 1); |
53 | 52 | id[row_iter] = d ? 1.0f / d : 0.0f; |
54 | 53 |
|
55 | | - y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); |
| 54 | + y[i].d[row_iter] = GGML_FP32_TO_FP16(d); |
56 | 55 | } |
57 | 56 |
|
58 | 57 | for (int j = 0; j < 8; j++) { |
@@ -103,7 +102,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR |
103 | 102 | const float d = amax / ((1 << 7) - 1); |
104 | 103 | id[row_iter] = d ? 1.0f / d : 0.0f; |
105 | 104 |
|
106 | | - y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); |
| 105 | + y[i].d[row_iter] = GGML_FP32_TO_FP16(d); |
107 | 106 | } |
108 | 107 |
|
109 | 108 | for (int j = 0; j < QK8_0 * 4; j++) { |
@@ -146,7 +145,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR |
146 | 145 | const float d = amax / ((1 << 7) - 1); |
147 | 146 | id[row_iter] = d ? 1.0f / d : 0.0f; |
148 | 147 |
|
149 | | - y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); |
| 148 | + y[i].d[row_iter] = GGML_FP32_TO_FP16(d); |
150 | 149 | } |
151 | 150 |
|
152 | 151 | for (int j = 0; j < 4; j++) { |
@@ -222,7 +221,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR |
222 | 221 | const float d = amax / ((1 << 7) - 1); |
223 | 222 | id[row_iter] = d ? 1.0f / d : 0.0f; |
224 | 223 |
|
225 | | - y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); |
| 224 | + y[i].d[row_iter] = GGML_FP32_TO_FP16(d); |
226 | 225 | } |
227 | 226 |
|
228 | 227 | for (int j = 0; j < QK8_0 * 4; j++) { |
@@ -312,7 +311,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo |
312 | 311 | const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); |
313 | 312 | sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; |
314 | 313 | } |
315 | | - sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); |
| 314 | + sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); |
316 | 315 | } |
317 | 316 | } |
318 | 317 | } |
@@ -400,7 +399,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo |
400 | 399 | const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); |
401 | 400 | sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; |
402 | 401 | } |
403 | | - sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); |
| 402 | + sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); |
404 | 403 | } |
405 | 404 | } |
406 | 405 | } |
@@ -515,7 +514,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo |
515 | 514 | const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); |
516 | 515 | sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; |
517 | 516 | } |
518 | | - sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); |
| 517 | + sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); |
519 | 518 | } |
520 | 519 | } |
521 | 520 | } |
@@ -609,7 +608,7 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const |
609 | 608 | const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4]; |
610 | 609 | sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])); |
611 | 610 | } |
612 | | - sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); |
| 611 | + sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); |
613 | 612 | } |
614 | 613 | } |
615 | 614 | } |
@@ -1118,7 +1117,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo |
1118 | 1117 | sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + |
1119 | 1118 | (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; |
1120 | 1119 | } |
1121 | | - sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); |
| 1120 | + sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); |
1122 | 1121 | } |
1123 | 1122 | } |
1124 | 1123 | } |
@@ -1571,7 +1570,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo |
1571 | 1570 | sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + |
1572 | 1571 | (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; |
1573 | 1572 | } |
1574 | | - sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); |
| 1573 | + sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); |
1575 | 1574 | } |
1576 | 1575 | } |
1577 | 1576 | } |
@@ -2040,7 +2039,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo |
2040 | 2039 | sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + |
2041 | 2040 | (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; |
2042 | 2041 | } |
2043 | | - sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); |
| 2042 | + sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); |
2044 | 2043 | } |
2045 | 2044 | } |
2046 | 2045 | } |
@@ -2148,7 +2147,7 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const |
2148 | 2147 | sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + |
2149 | 2148 | (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])); |
2150 | 2149 | } |
2151 | | - sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); |
| 2150 | + sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); |
2152 | 2151 | } |
2153 | 2152 | } |
2154 | 2153 | } |
|
0 commit comments