@@ -52,7 +52,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR
5252 const float d = amax / ((1 << 7 ) - 1 );
5353 id[row_iter] = d ? 1 .0f / d : 0 .0f ;
5454
55- y[i].d [row_iter] = GGML_FP32_TO_FP16 (d);
55+ y[i].d [row_iter] = GGML_CPU_FP32_TO_FP16 (d);
5656 }
5757
5858 for (int j = 0 ; j < 8 ; j++) {
@@ -103,7 +103,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR
103103 const float d = amax / ((1 << 7 ) - 1 );
104104 id[row_iter] = d ? 1 .0f / d : 0 .0f ;
105105
106- y[i].d [row_iter] = GGML_FP32_TO_FP16 (d);
106+ y[i].d [row_iter] = GGML_CPU_FP32_TO_FP16 (d);
107107 }
108108
109109 for (int j = 0 ; j < QK8_0 * 4 ; j++) {
@@ -146,7 +146,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR
146146 const float d = amax / ((1 << 7 ) - 1 );
147147 id[row_iter] = d ? 1 .0f / d : 0 .0f ;
148148
149- y[i].d [row_iter] = GGML_FP32_TO_FP16 (d);
149+ y[i].d [row_iter] = GGML_CPU_FP32_TO_FP16 (d);
150150 }
151151
152152 for (int j = 0 ; j < 4 ; j++) {
@@ -222,7 +222,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR
222222 const float d = amax / ((1 << 7 ) - 1 );
223223 id[row_iter] = d ? 1 .0f / d : 0 .0f ;
224224
225- y[i].d [row_iter] = GGML_FP32_TO_FP16 (d);
225+ y[i].d [row_iter] = GGML_CPU_FP32_TO_FP16 (d);
226226 }
227227
228228 for (int j = 0 ; j < QK8_0 * 4 ; j++) {
@@ -312,7 +312,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
312312 const int v1 = (int8_t ) (b_ptr[l].qs [k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0 );
313313 sumi += ((v0 * a_ptr[l].qs [k * blocklen + i]) + (v1 * a_ptr[l].qs [k * blocklen + i + qk / 2 ])) >> 4 ;
314314 }
315- sumf[j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d );
315+ sumf[j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d );
316316 }
317317 }
318318 }
@@ -400,7 +400,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
400400 const int v1 = (int8_t ) (b_ptr[l].qs [k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0 );
401401 sumi += ((v0 * a_ptr[l].qs [k * blocklen + i]) + (v1 * a_ptr[l].qs [k * blocklen + i + qk / 2 ])) >> 4 ;
402402 }
403- sumf[j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d );
403+ sumf[j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d );
404404 }
405405 }
406406 }
@@ -515,7 +515,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
515515 const int v1 = (int8_t ) (b_ptr[l].qs [k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0 );
516516 sumi += ((v0 * a_ptr[l].qs [k * blocklen + i]) + (v1 * a_ptr[l].qs [k * blocklen + i + qk / 2 ])) >> 4 ;
517517 }
518- sumf[j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d );
518+ sumf[j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d );
519519 }
520520 }
521521 }
@@ -609,7 +609,7 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const
609609 const int v1 = kvalues_iq4nl[b_ptr[l].qs [k * ncols_interleaved * blocklen + j * blocklen + i] >> 4 ];
610610 sumi += ((v0 * a_ptr[l].qs [k * blocklen + i]) + (v1 * a_ptr[l].qs [k * blocklen + i + qk / 2 ]));
611611 }
612- sumf[j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d );
612+ sumf[j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d );
613613 }
614614 }
615615 }
@@ -1118,7 +1118,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
11181118 sumi += ((v0 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i]) +
11191119 (v1 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i + qk / 2 * 4 ])) >> 4 ;
11201120 }
1121- sumf[m][j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d [m]);
1121+ sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d [m]);
11221122 }
11231123 }
11241124 }
@@ -1571,7 +1571,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
15711571 sumi += ((v0 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i]) +
15721572 (v1 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i + qk / 2 * 4 ])) >> 4 ;
15731573 }
1574- sumf[m][j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d [m]);
1574+ sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d [m]);
15751575 }
15761576 }
15771577 }
@@ -2040,7 +2040,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
20402040 sumi += ((v0 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i]) +
20412041 (v1 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i + qk / 2 * 4 ])) >> 4 ;
20422042 }
2043- sumf[m][j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d [m]);
2043+ sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d [m]);
20442044 }
20452045 }
20462046 }
@@ -2148,7 +2148,7 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const
21482148 sumi += ((v0 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i]) +
21492149 (v1 * a_ptr[l].qs [k * 4 * blocklen + m * blocklen + i + qk / 2 * 4 ]));
21502150 }
2151- sumf[m][j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_FP16_TO_FP32 (a_ptr[l].d [m]);
2151+ sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32 (b_ptr[l].d [j]) * GGML_CPU_FP16_TO_FP32 (a_ptr[l].d [m]);
21522152 }
21532153 }
21542154 }
0 commit comments