Skip to content

Commit 58ab353

Browse files
committed
[WIP] Support Q4_0.
1 parent 14bc9eb commit 58ab353

File tree

1 file changed

+19
-15
lines changed

1 file changed

+19
-15
lines changed

ggml/src/ggml-cpu/tmac/lut_mul_mat.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -500,22 +500,26 @@ size_t ggml_backend_tmac_desired_wsize(const struct ggml_tensor * dst) {
500500
}
501501

502502
size_t ggml_tmac_get_nbytes(const struct ggml_tensor * tensor) {
503-
const int bits = get_type_bits(tensor->type);
503+
if (is_tmac_type(tensor->type)) {
504+
const int bits = get_type_bits(tensor->type);
504505

505-
int k = tensor->ne[0];
506-
int m = tensor->ne[1]; // `n` in llama.cpp
506+
int k = tensor->ne[0];
507+
int m = tensor->ne[1]; // `n` in llama.cpp
507508

508-
struct tmac_kernel_config * kernel_config = find_tmac_kernel_config(m, k, bits);
509-
if (kernel_config == nullptr) {
510-
ggml_tmac_tune_kernel_config(tensor, m, k);
511-
kernel_config = find_tmac_kernel_config(m, k, bits);
512-
}
509+
struct tmac_kernel_config * kernel_config = find_tmac_kernel_config(m, k, bits);
510+
if (kernel_config == nullptr) {
511+
ggml_tmac_tune_kernel_config(tensor, m, k);
512+
kernel_config = find_tmac_kernel_config(m, k, bits);
513+
}
513514

514-
const int scales_size = ggml_tmac_get_scales_size(kernel_config, m, k);
515-
// Currently, always uses float to store scales or zero points
516-
size_t nbytes = k * m / 8 * bits + scales_size * sizeof(float);
517-
nbytes = GGML_PAD(nbytes, GGUF_DEFAULT_ALIGNMENT);
518-
return nbytes;
515+
const int scales_size = ggml_tmac_get_scales_size(kernel_config, m, k);
516+
// Currently, always uses float to store scales or zero points
517+
size_t nbytes = k * m / 8 * bits + scales_size * sizeof(float);
518+
nbytes = GGML_PAD(nbytes, GGUF_DEFAULT_ALIGNMENT);
519+
return nbytes;
520+
} else {
521+
return ggml_nbytes(tensor);
522+
}
519523
}
520524

521525

@@ -1030,15 +1034,15 @@ void ggml_backend_tmac_convert_weight(struct ggml_tensor * tensor, const void *
10301034
// m = batch_size
10311035
// n = output_dim
10321036
// t-mac llama.cpp n and m swapped
1033-
void ggml_tmac_mul_mat_task_init(void * src1, void * qlut, void * lut_scales, void * lut_biases, int n, int k, int m, int bits) {
1037+
static inline void ggml_tmac_mul_mat_task_init(void * src1, void * qlut, void * lut_scales, void * lut_biases, int n, int k, int m, int bits) {
10341038
struct tmac_kernel_config * kernel_config = find_tmac_kernel_config(n, k, bits);
10351039
if (kernel_config == nullptr) {
10361040
throw std::runtime_error("ggml_tmac_mul_mat_task_init: Failed to find kernel config for m" + std::to_string(n) + "_k" + std::to_string(k) + "_b" + std::to_string(bits));
10371041
}
10381042
lut_ctor_int8_g4(src1, lut_scales, lut_biases, qlut, k, kernel_config);
10391043
}
10401044

1041-
void ggml_tmac_mul_mat_task_compute(void * src0, void * scales, void * qlut, void * lut_scales, void * lut_biases, void * dst, int n, int k, int m, int bits) {
1045+
static inline void ggml_tmac_mul_mat_task_compute(void * src0, void * scales, void * qlut, void * lut_scales, void * lut_biases, void * dst, int n, int k, int m, int bits) {
10421046
struct tmac_kernel_config * kernel_config = find_tmac_kernel_config(n, k, bits);
10431047
if (kernel_config == nullptr) {
10441048
GGML_LOG_INFO("Failed to find kernel config for m%d_k%d_b%d\n", n, k, bits);

0 commit comments

Comments
 (0)