Skip to content

Commit e876c2b

Browse files
committed
Revert "ggml-cpu: enable IBM NNPA Vector Intrinsics (ggml-org#14317)"
1 parent 1e65120 commit e876c2b

File tree

24 files changed

+833
-944
lines changed

24 files changed

+833
-944
lines changed

ggml/include/ggml-cpu.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ extern "C" {
101101
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
102102
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103103
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
104-
GGML_BACKEND_API int ggml_cpu_has_nnpa (void);
105104
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
106105
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
107106

ggml/src/ggml-cpu/arch/arm/quants.c

Lines changed: 108 additions & 109 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cpu/arch/arm/repack.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include "ggml-impl.h"
77
#include "ggml-cpu.h"
88
#include "ggml-cpu-impl.h"
9-
#include "simd-mappings.h"
109
#include "traits.h"
1110

1211
#include <cmath>
@@ -52,7 +51,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR
5251
const float d = amax / ((1 << 7) - 1);
5352
id[row_iter] = d ? 1.0f / d : 0.0f;
5453

55-
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
54+
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
5655
}
5756

5857
for (int j = 0; j < 8; j++) {
@@ -103,7 +102,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR
103102
const float d = amax / ((1 << 7) - 1);
104103
id[row_iter] = d ? 1.0f / d : 0.0f;
105104

106-
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
105+
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
107106
}
108107

109108
for (int j = 0; j < QK8_0 * 4; j++) {
@@ -146,7 +145,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR
146145
const float d = amax / ((1 << 7) - 1);
147146
id[row_iter] = d ? 1.0f / d : 0.0f;
148147

149-
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
148+
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
150149
}
151150

152151
for (int j = 0; j < 4; j++) {
@@ -222,7 +221,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR
222221
const float d = amax / ((1 << 7) - 1);
223222
id[row_iter] = d ? 1.0f / d : 0.0f;
224223

225-
y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d);
224+
y[i].d[row_iter] = GGML_FP32_TO_FP16(d);
226225
}
227226

228227
for (int j = 0; j < QK8_0 * 4; j++) {
@@ -312,7 +311,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
312311
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
313312
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
314313
}
315-
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
314+
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
316315
}
317316
}
318317
}
@@ -400,7 +399,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
400399
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
401400
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
402401
}
403-
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
402+
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
404403
}
405404
}
406405
}
@@ -515,7 +514,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
515514
const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0);
516515
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4;
517516
}
518-
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
517+
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
519518
}
520519
}
521520
}
@@ -609,7 +608,7 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const
609608
const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4];
610609
sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2]));
611610
}
612-
sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d);
611+
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d);
613612
}
614613
}
615614
}
@@ -1118,7 +1117,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
11181117
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
11191118
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
11201119
}
1121-
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
1120+
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
11221121
}
11231122
}
11241123
}
@@ -1571,7 +1570,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
15711570
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
15721571
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
15731572
}
1574-
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
1573+
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
15751574
}
15761575
}
15771576
}
@@ -2040,7 +2039,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
20402039
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
20412040
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4;
20422041
}
2043-
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
2042+
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
20442043
}
20452044
}
20462045
}
@@ -2148,7 +2147,7 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const
21482147
sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) +
21492148
(v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4]));
21502149
}
2151-
sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]);
2150+
sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]);
21522151
}
21532152
}
21542153
}

0 commit comments

Comments
 (0)