@@ -146,6 +146,11 @@ static inline int get_type_group_size(enum ggml_type type) {
146146 case GGML_TYPE_TMAC_W4G128_0:
147147 case GGML_TYPE_TMAC_W4G128_1:
148148 return 128 ;
149+ case GGML_TYPE_Q4_0:
150+ return 32 ;
151+ case GGML_TYPE_TQ1_0:
152+ case GGML_TYPE_TQ2_0:
153+ return 256 ;
149154 default :
150155 return 0 ;
151156 }
@@ -164,6 +169,10 @@ static inline bool get_type_has_zero_point(enum ggml_type type) {
164169 case GGML_TYPE_TMAC_W2G128_1:
165170 case GGML_TYPE_TMAC_W4G128_1:
166171 return true ;
172+ case GGML_TYPE_Q4_0:
173+ case GGML_TYPE_TQ1_0:
174+ case GGML_TYPE_TQ2_0:
175+ return false ;
167176 default :
168177 return false ;
169178 }
@@ -178,29 +187,6 @@ static inline bool get_type_is_one_scale(enum ggml_type type) {
178187 }
179188}
180189
181- static inline int ggml_tmac_get_type_bits (enum ggml_type type) {
182- switch (type) {
183- case GGML_TYPE_TMAC_BN_0:
184- case GGML_TYPE_TMAC_W2G64_0:
185- case GGML_TYPE_TMAC_W2G64_1:
186- case GGML_TYPE_TMAC_W2G128_0:
187- case GGML_TYPE_TMAC_W2G128_1:
188- return 2 ;
189- case GGML_TYPE_TMAC_W4G64_0:
190- case GGML_TYPE_TMAC_W4G64_1:
191- case GGML_TYPE_TMAC_W4G128_0:
192- case GGML_TYPE_TMAC_W4G128_1:
193- return 4 ;
194- case GGML_TYPE_Q4_0:
195- return 4 ;
196- case GGML_TYPE_TQ1_0:
197- case GGML_TYPE_TQ2_0:
198- return 2 ;
199- default :
200- return 0 ;
201- }
202- }
203-
204190static inline int ggml_tmac_get_scales_size (const struct tmac_kernel_config * kernel_config, int m, int k) {
205191 int scales_size;
206192 if (kernel_config->one_scale ) {
@@ -495,7 +481,7 @@ size_t ggml_backend_tmac_desired_wsize(const struct ggml_tensor * dst) {
495481 const size_t n = src0->ne [1 ]; // llama.cpp n
496482 const size_t k = src1->ne [0 ]; // k
497483 const size_t m = src1->ne [1 ]; // llama.cpp m
498- const int bits = ggml_tmac_get_type_bits (src0->type );
484+ const int bits = get_type_bits (src0->type );
499485
500486 struct tmac_kernel_config * kernel_config = find_tmac_kernel_config (n, k, bits);
501487 if (kernel_config == nullptr ) {
@@ -514,7 +500,7 @@ size_t ggml_backend_tmac_desired_wsize(const struct ggml_tensor * dst) {
514500}
515501
516502size_t ggml_tmac_get_nbytes (const struct ggml_tensor * tensor) {
517- const int bits = ggml_tmac_get_type_bits (tensor->type );
503+ const int bits = get_type_bits (tensor->type );
518504
519505 int k = tensor->ne [0 ];
520506 int m = tensor->ne [1 ]; // `n` in llama.cpp
@@ -529,7 +515,6 @@ size_t ggml_tmac_get_nbytes(const struct ggml_tensor * tensor) {
529515 // Currently, always uses float to store scales or zero points
530516 size_t nbytes = k * m / 8 * bits + scales_size * sizeof (float );
531517 nbytes = GGML_PAD (nbytes, GGUF_DEFAULT_ALIGNMENT);
532- // printf("ggml_tmac_get_nbytes: %s --- k=%d, m=%d, w=%d, sc=%d, nbytes: %zu\n", tensor->name, k, m, k * m / 8 * bits, scales_size, nbytes);
533518 return nbytes;
534519}
535520
@@ -727,7 +712,7 @@ static inline void ggml_tmac_transform_tensor(struct ggml_tensor * tensor, const
727712 return ;
728713 }
729714
730- const int bits = ggml_tmac_get_type_bits (tensor->type );
715+ const int bits = get_type_bits (tensor->type );
731716 int k = tensor->ne [0 ];
732717 int m = tensor->ne [1 ]; // `n` in llama.cpp
733718
@@ -1087,7 +1072,7 @@ void ggml_backend_tmac_mul_mat(const struct ggml_compute_params * params, struct
10871072 GGML_ASSERT (nb1 <= nb2);
10881073 GGML_ASSERT (nb2 <= nb3);
10891074
1090- const int bits = ggml_tmac_get_type_bits (src0->type );
1075+ const int bits = get_type_bits (src0->type );
10911076 // src0: weight, ne00 = k, ne01 = n
10921077 // src1: activation, ne10 = k, ne11 = m
10931078 char * wdata = (char *) (params->wdata );
0 commit comments