Skip to content

Commit a02b360

Browse files
committed
ggml-cpu: rename all fp16<->fp32 macros to prefix with ggml_cpu
ref: #14317 (comment) Signed-off-by: Aaron Teo <[email protected]>
1 parent 64568ff commit a02b360

File tree

20 files changed

+666
-735
lines changed

20 files changed

+666
-735
lines changed

ggml/src/ggml-cpu/amx/mmq.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ void quantize_row_q8_K_vnni(const float * RESTRICT x, void * RESTRICT vy, int64_
454454

455455
// Quantize these floats
456456
const float iscale = 127.f / amax;
457-
y[i].d = GGML_FP32_TO_FP16(1 / iscale);
457+
y[i].d = GGML_CPU_FP32_TO_FP16(1 / iscale);
458458
const float id = ( amax != 0.0f ) ? iscale : 0.f;
459459
const __m512 vscale = _mm512_set1_ps(id);
460460

@@ -1091,7 +1091,7 @@ struct acc_C<block_q8_0, block_q4_0, is_acc> {
10911091
const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
10921092

10931093
for (int m = 0; m < nr; ++m) {
1094-
const __m512 vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].d));
1094+
const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d));
10951095
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
10961096

10971097
__m512 vsum;
@@ -1114,8 +1114,8 @@ struct acc_C<block_q8_1, block_q4_1, is_acc> {
11141114
const __m512 vm0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset + TILE_N * sizeof(ggml_half))));
11151115

11161116
for (int m = 0; m < nr; ++m) {
1117-
const __m512 vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].d));
1118-
const __m512 vs1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].s));
1117+
const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d));
1118+
const __m512 vs1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].s));
11191119
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
11201120

11211121
__m512 vsum;
@@ -1138,7 +1138,7 @@ struct acc_C<block_q8_0, block_q8_0, is_acc> {
11381138
const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset)));
11391139

11401140
for (int m = 0; m < nr; ++m) {
1141-
const __m512 vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].d));
1141+
const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d));
11421142
const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N));
11431143

11441144
__m512 vsum;
@@ -1438,7 +1438,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLO
14381438
va[k] = _mm512_set1_epi32(a_ptr[k]);
14391439
vcomp = _mm512_dpbusd_epi32(vcomp, off, va[k]);
14401440
}
1441-
vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[0 * KB + i].d));
1441+
vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
14421442
}
14431443

14441444
// load b
@@ -1499,8 +1499,8 @@ struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K>
14991499
for (int k = 0; k < 8; ++k) {
15001500
va[k] = _mm512_set1_epi32(a_ptr[k]);
15011501
}
1502-
vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[0 * KB + i].d));
1503-
vs1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[0 * KB + i].s));
1502+
vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
1503+
vs1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].s));
15041504
}
15051505

15061506
// load b
@@ -1572,7 +1572,7 @@ struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLO
15721572
va[k] = _mm512_set1_epi32(a_ptr[k]);
15731573
va[k] = _mm512_add_epi8(va[k], off);
15741574
}
1575-
vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[0 * KB + i].d));
1575+
vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d));
15761576
}
15771577

15781578
// load b

ggml/src/ggml-cpu/arch/arm/quants.c

Lines changed: 108 additions & 108 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cpu/arch/arm/repack.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR
5252
const float d = amax / ((1 << 7) - 1);
5353
id[row_iter] = d ? 1.0f / d : 0.0f;
5454

55-
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
55+
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
5656
}
5757

5858
for (int j = 0; j < 8; j++) {
@@ -103,7 +103,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR
103103
const float d = amax / ((1 << 7) - 1);
104104
id[row_iter] = d ? 1.0f / d : 0.0f;
105105

106-
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
106+
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
107107
}
108108

109109
for (int j = 0; j < QK8_0 * 4; j++) {
@@ -146,7 +146,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR
146146
const float d = amax / ((1 << 7) - 1);
147147
id[row_iter] = d ? 1.0f / d : 0.0f;
148148

149-
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
149+
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
150150
}
151151

152152
for (int j = 0; j < 4; j++) {
@@ -222,7 +222,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR
222222
const float d = amax / ((1 << 7) - 1);
223223
id[row_iter] = d ? 1.0f / d : 0.0f;
224224

225-
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
225+
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
226226
}
227227

228228
for (int j = 0; j < QK8_0 * 4; j++) {
@@ -312,7 +312,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
312312
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
313313
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
314314
}
315-
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
315+
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
316316
}
317317
}
318318
}
@@ -400,7 +400,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
400400
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
401401
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
402402
}
403-
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
403+
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
404404
}
405405
}
406406
}
@@ -515,7 +515,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
515515
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
516516
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
517517
}
518-
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
518+
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
519519
}
520520
}
521521
}
@@ -609,7 +609,7 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const
609609
const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4];
610610
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2]));
611611
}
612-
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
612+
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
613613
}
614614
}
615615
}
@@ -1118,7 +1118,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
11181118
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
11191119
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
11201120
}
1121-
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
1121+
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
11221122
}
11231123
}
11241124
}
@@ -1571,7 +1571,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
15711571
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
15721572
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
15731573
}
1574-
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
1574+
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
15751575
}
15761576
}
15771577
}
@@ -2040,7 +2040,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
20402040
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
20412041
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
20422042
}
2043-
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
2043+
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
20442044
}
20452045
}
20462046
}
@@ -2148,7 +2148,7 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const
21482148
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
21492149
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4]));
21502150
}
2151-
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
2151+
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
21522152
}
21532153
}
21542154
}

0 commit comments

Comments
 (0)