Skip to content

Commit b8ee774

Browse files
committed
[WIP] Support Q4_0 and TQ types.
1 parent 9bbec5d commit b8ee774

File tree

2 files changed

+15
-30
lines changed

2 files changed

+15
-30
lines changed

ggml/src/ggml-cpu/tmac/lut_mul_mat.cpp

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ static inline int get_type_group_size(enum ggml_type type) {
146146
case GGML_TYPE_TMAC_W4G128_0:
147147
case GGML_TYPE_TMAC_W4G128_1:
148148
return 128;
149+
case GGML_TYPE_Q4_0:
150+
return 32;
151+
case GGML_TYPE_TQ1_0:
152+
case GGML_TYPE_TQ2_0:
153+
return 256;
149154
default:
150155
return 0;
151156
}
@@ -164,6 +169,10 @@ static inline bool get_type_has_zero_point(enum ggml_type type) {
164169
case GGML_TYPE_TMAC_W2G128_1:
165170
case GGML_TYPE_TMAC_W4G128_1:
166171
return true;
172+
case GGML_TYPE_Q4_0:
173+
case GGML_TYPE_TQ1_0:
174+
case GGML_TYPE_TQ2_0:
175+
return false;
167176
default:
168177
return false;
169178
}
@@ -178,29 +187,6 @@ static inline bool get_type_is_one_scale(enum ggml_type type) {
178187
}
179188
}
180189

181-
static inline int ggml_tmac_get_type_bits(enum ggml_type type) {
182-
switch (type) {
183-
case GGML_TYPE_TMAC_BN_0:
184-
case GGML_TYPE_TMAC_W2G64_0:
185-
case GGML_TYPE_TMAC_W2G64_1:
186-
case GGML_TYPE_TMAC_W2G128_0:
187-
case GGML_TYPE_TMAC_W2G128_1:
188-
return 2;
189-
case GGML_TYPE_TMAC_W4G64_0:
190-
case GGML_TYPE_TMAC_W4G64_1:
191-
case GGML_TYPE_TMAC_W4G128_0:
192-
case GGML_TYPE_TMAC_W4G128_1:
193-
return 4;
194-
case GGML_TYPE_Q4_0:
195-
return 4;
196-
case GGML_TYPE_TQ1_0:
197-
case GGML_TYPE_TQ2_0:
198-
return 2;
199-
default:
200-
return 0;
201-
}
202-
}
203-
204190
static inline int ggml_tmac_get_scales_size(const struct tmac_kernel_config * kernel_config, int m, int k) {
205191
int scales_size;
206192
if (kernel_config->one_scale) {
@@ -495,7 +481,7 @@ size_t ggml_backend_tmac_desired_wsize(const struct ggml_tensor * dst) {
495481
const size_t n = src0->ne[1]; // llama.cpp n
496482
const size_t k = src1->ne[0]; // k
497483
const size_t m = src1->ne[1]; // llama.cpp m
498-
const int bits = ggml_tmac_get_type_bits(src0->type);
484+
const int bits = get_type_bits(src0->type);
499485

500486
struct tmac_kernel_config * kernel_config = find_tmac_kernel_config(n, k, bits);
501487
if (kernel_config == nullptr) {
@@ -514,7 +500,7 @@ size_t ggml_backend_tmac_desired_wsize(const struct ggml_tensor * dst) {
514500
}
515501

516502
size_t ggml_tmac_get_nbytes(const struct ggml_tensor * tensor) {
517-
const int bits = ggml_tmac_get_type_bits(tensor->type);
503+
const int bits = get_type_bits(tensor->type);
518504

519505
int k = tensor->ne[0];
520506
int m = tensor->ne[1]; // `n` in llama.cpp
@@ -529,7 +515,6 @@ size_t ggml_tmac_get_nbytes(const struct ggml_tensor * tensor) {
529515
// Currently, always uses float to store scales or zero points
530516
size_t nbytes = k * m / 8 * bits + scales_size * sizeof(float);
531517
nbytes = GGML_PAD(nbytes, GGUF_DEFAULT_ALIGNMENT);
532-
// printf("ggml_tmac_get_nbytes: %s --- k=%d, m=%d, w=%d, sc=%d, nbytes: %zu\n", tensor->name, k, m, k * m / 8 * bits, scales_size, nbytes);
533518
return nbytes;
534519
}
535520

@@ -727,7 +712,7 @@ static inline void ggml_tmac_transform_tensor(struct ggml_tensor * tensor, const
727712
return;
728713
}
729714

730-
const int bits = ggml_tmac_get_type_bits(tensor->type);
715+
const int bits = get_type_bits(tensor->type);
731716
int k = tensor->ne[0];
732717
int m = tensor->ne[1]; // `n` in llama.cpp
733718

@@ -1087,7 +1072,7 @@ void ggml_backend_tmac_mul_mat(const struct ggml_compute_params * params, struct
10871072
GGML_ASSERT(nb1 <= nb2);
10881073
GGML_ASSERT(nb2 <= nb3);
10891074

1090-
const int bits = ggml_tmac_get_type_bits(src0->type);
1075+
const int bits = get_type_bits(src0->type);
10911076
// src0: weight, ne00 = k, ne01 = n
10921077
// src1: activation, ne10 = k, ne11 = m
10931078
char * wdata = (char *) (params->wdata);

ggml/src/ggml-cpu/tmac/tmac.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ static size_t ggml_backend_tmac_buffer_type_get_alignment(ggml_backend_buffer_ty
135135

136136
static size_t ggml_backend_tmac_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor) {
137137
// T-MAC version of ggml_nbytes
138-
if(is_tmac_type(tensor->type)){
139-
return ggml_tmac_get_nbytes(tensor);
138+
if (is_tmac_type(tensor->type)){
139+
return ggml_tmac_get_nbytes(tensor);
140140
}
141141

142142
return ggml_nbytes(tensor);

0 commit comments

Comments
 (0)