Skip to content

Commit d99c2b1

Browse files
committed
Revert "ggml-cpu: add mxfp4 VSX intrinsics for Power9+ (ppc64le) hardware (ggml-org#15385)"
This reverts commit 6424594.
1 parent 3e7da26 commit d99c2b1

File tree

2 files changed

+1
-66
lines changed

2 files changed

+1
-66
lines changed

ggml/src/ggml-cpu/arch-fallback.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
7474
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
7575
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
76+
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
7677
// repack.cpp
7778
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
7879
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8

ggml/src/ggml-cpu/arch/powerpc/quants.c

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -278,72 +278,6 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
278278
#endif
279279
}
280280

281-
void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
282-
assert(nrc == 1);
283-
UNUSED(nrc);
284-
UNUSED(bx);
285-
UNUSED(by);
286-
UNUSED(bs);
287-
assert(n % QK_MXFP4 == 0);
288-
static_assert(QK_MXFP4 == QK8_0, "QK_MXFP4 and QK8_0 must be the same");
289-
290-
const block_mxfp4 * GGML_RESTRICT x = vx;
291-
const block_q8_0 * GGML_RESTRICT y = vy;
292-
293-
const int nb = n / QK_MXFP4;
294-
295-
int ib = 0;
296-
float sumf = 0;
297-
298-
#if defined(__POWER9_VECTOR__)
299-
const vector signed char lowMask = vec_splats((signed char)0xF);
300-
const vector unsigned char vshift4 = vec_splats((unsigned char)4);
301-
vector float vsumf0 = vec_splats(0.0f);
302-
303-
vector signed char kv = vec_xl(0, (const signed char *)kvalues_mxfp4);
304-
305-
#pragma GCC unroll 8
306-
for (; ib < nb; ++ib) {
307-
__builtin_prefetch(x[ib].qs, 0, 1);
308-
__builtin_prefetch(y[ib].qs, 0, 1);
309-
310-
vector float vyd = vec_splats(GGML_CPU_FP16_TO_FP32(y[ib].d) *
311-
GGML_E8M0_TO_FP32_HALF(x[ib].e));
312-
313-
vector signed char q8y0 = vec_xl( 0, y[ib].qs);
314-
vector signed char q8y1 = vec_xl(16, y[ib].qs);
315-
316-
vector signed char qxs = (vector signed char)vec_xl(0, x[ib].qs);
317-
318-
vector unsigned char lo_nibbles = (vector unsigned char)vec_and(qxs, lowMask);
319-
vector unsigned char hi_nibbles = (vector unsigned char)vec_sr(qxs, vshift4);
320-
321-
vector signed char q4x0 = vec_perm(kv, kv, lo_nibbles);
322-
vector signed char q4x1 = vec_perm(kv, kv, hi_nibbles);
323-
324-
vector signed short qv0 = vec_add(vec_mule(q4x0, q8y0), vec_mulo(q4x0, q8y0));
325-
vector signed short qv1 = vec_add(vec_mule(q4x1, q8y1), vec_mulo(q4x1, q8y1));
326-
327-
vector signed int vsumi0 = vec_splats((int32_t)0);
328-
vsumi0 = vec_sum4s(qv0, vsumi0);
329-
vsumi0 = vec_sum4s(qv1, vsumi0);
330-
331-
vsumf0 = vec_madd(vec_ctf(vsumi0, 0), vyd, vsumf0);
332-
}
333-
334-
vsumf0 = vec_add(vsumf0, vec_sld(vsumf0, vsumf0, 4));
335-
vsumf0 = vec_add(vsumf0, vec_sld(vsumf0, vsumf0, 8));
336-
sumf = vec_extract(vsumf0, 0);
337-
*s = sumf;
338-
#else
339-
UNUSED(x);
340-
UNUSED(y);
341-
UNUSED(ib);
342-
UNUSED(sumf);
343-
ggml_vec_dot_mxfp4_q8_0_generic(n, s, bs, vx, bx, vy, by, nrc);
344-
#endif
345-
}
346-
347281
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
348282
const int qk = QK8_0;
349283
const int nb = n / qk;

0 commit comments

Comments
 (0)