Skip to content

Commit dddf377

Browse files
committed
use reference quantization fns in AMX until moved to CPU backend
ggml-ci
1 parent 5cfaecd commit dddf377

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

ggml/src/ggml-amx/ggml-amx.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,4 +433,8 @@ void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {
433433
GGML_UNUSED(n_threads);
434434
}
435435

436+
ggml_backend_reg_t ggml_backend_amx_reg(void) {
437+
return nullptr;
438+
}
439+
436440
#endif

ggml/src/ggml-amx/mmq.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,19 +496,20 @@ inline void from_float(const float * x, char * vy, int64_t k);
496496

497497
template <>
498498
inline void from_float<block_q8_0>(const float * x, char * vy, int64_t k) {
499-
quantize_row_q8_0(x, vy, k);
499+
// FIXME: using unoptimized reference impl until moved to CPU backend
500+
quantize_row_q8_0_ref(x, (block_q8_0 *)vy, k);
500501
}
501502

502503
template <>
503504
inline void from_float<block_q8_1>(const float * x, char * vy, int64_t k) {
504-
quantize_row_q8_1(x, vy, k);
505+
quantize_row_q8_1_ref(x, (block_q8_1 *)vy, k);
505506
}
506507

507508
template <>
508509
inline void from_float<block_q8_K>(const float * x, char * vy, int64_t k) {
509510
#if 1
510511
// TODO: this is reference impl!
511-
quantize_row_q8_K(x, vy, k);
512+
quantize_row_q8_K_ref(x, (block_q8_K *)vy, k);
512513
#else
513514
quantize_row_q8_K_vnni(x, vy, k);
514515
#endif

ggml/src/ggml-backend-reg.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@
3131
#include "ggml-rpc.h"
3232
#endif
3333

34-
#ifndef __AMX_INT8__
35-
#undef GGML_USE_AMX
36-
#endif
37-
3834
#ifdef GGML_USE_AMX
3935
# include "ggml-amx.h"
4036
#endif
@@ -84,6 +80,10 @@ struct ggml_backend_registry {
8480
}
8581

8682
void register_backend(ggml_backend_reg_t reg) {
83+
if (!reg) {
84+
return;
85+
}
86+
8787
#ifndef NDEBUG
8888
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
8989
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));

0 commit comments

Comments
 (0)