Skip to content

Commit 990775d

Browse files
committed
use reference quantization fns in AMX until moved to CPU backend
ggml-ci
1 parent 5cfaecd commit 990775d

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

ggml/src/ggml-amx/mmq.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,19 +496,20 @@ inline void from_float(const float * x, char * vy, int64_t k);
496496

497497
template <>
498498
inline void from_float<block_q8_0>(const float * x, char * vy, int64_t k) {
499-
quantize_row_q8_0(x, vy, k);
499+
// FIXME: using unoptimized reference impl until moved to CPU backend
500+
quantize_row_q8_0_ref(x, (block_q8_0 *)vy, k);
500501
}
501502

502503
template <>
503504
inline void from_float<block_q8_1>(const float * x, char * vy, int64_t k) {
504-
quantize_row_q8_1(x, vy, k);
505+
quantize_row_q8_1_ref(x, (block_q8_1 *)vy, k);
505506
}
506507

507508
template <>
508509
inline void from_float<block_q8_K>(const float * x, char * vy, int64_t k) {
509510
#if 1
510511
// TODO: this is reference impl!
511-
quantize_row_q8_K(x, vy, k);
512+
quantize_row_q8_K_ref(x, (block_q8_K *)vy, k);
512513
#else
513514
quantize_row_q8_K_vnni(x, vy, k);
514515
#endif

0 commit comments

Comments
 (0)