@@ -500,22 +500,26 @@ size_t ggml_backend_tmac_desired_wsize(const struct ggml_tensor * dst) {
500500}
501501
502502size_t ggml_tmac_get_nbytes (const struct ggml_tensor * tensor) {
503- const int bits = get_type_bits (tensor->type );
503+ if (is_tmac_type (tensor->type )) {
504+ const int bits = get_type_bits (tensor->type );
504505
505- int k = tensor->ne [0 ];
506- int m = tensor->ne [1 ]; // `n` in llama.cpp
506+ int k = tensor->ne [0 ];
507+ int m = tensor->ne [1 ]; // `n` in llama.cpp
507508
508- struct tmac_kernel_config * kernel_config = find_tmac_kernel_config (m, k, bits);
509- if (kernel_config == nullptr ) {
510- ggml_tmac_tune_kernel_config (tensor, m, k);
511- kernel_config = find_tmac_kernel_config (m, k, bits);
512- }
509+ struct tmac_kernel_config * kernel_config = find_tmac_kernel_config (m, k, bits);
510+ if (kernel_config == nullptr ) {
511+ ggml_tmac_tune_kernel_config (tensor, m, k);
512+ kernel_config = find_tmac_kernel_config (m, k, bits);
513+ }
513514
514- const int scales_size = ggml_tmac_get_scales_size (kernel_config, m, k);
515- // Currently, always uses float to store scales or zero points
516- size_t nbytes = k * m / 8 * bits + scales_size * sizeof (float );
517- nbytes = GGML_PAD (nbytes, GGUF_DEFAULT_ALIGNMENT);
518- return nbytes;
515+ const int scales_size = ggml_tmac_get_scales_size (kernel_config, m, k);
516+ // Currently, always uses float to store scales or zero points
517+ size_t nbytes = k * m / 8 * bits + scales_size * sizeof (float );
518+ nbytes = GGML_PAD (nbytes, GGUF_DEFAULT_ALIGNMENT);
519+ return nbytes;
520+ } else {
521+ return ggml_nbytes (tensor);
522+ }
519523}
520524
521525
@@ -1030,15 +1034,15 @@ void ggml_backend_tmac_convert_weight(struct ggml_tensor * tensor, const void *
10301034// m = batch_size
10311035// n = output_dim
10321036// t-mac llama.cpp n and m swapped
1033- void ggml_tmac_mul_mat_task_init (void * src1, void * qlut, void * lut_scales, void * lut_biases, int n, int k, int m, int bits) {
1037+ static inline void ggml_tmac_mul_mat_task_init (void * src1, void * qlut, void * lut_scales, void * lut_biases, int n, int k, int m, int bits) {
10341038 struct tmac_kernel_config * kernel_config = find_tmac_kernel_config (n, k, bits);
10351039 if (kernel_config == nullptr ) {
10361040 throw std::runtime_error (" ggml_tmac_mul_mat_task_init: Failed to find kernel config for m" + std::to_string (n) + " _k" + std::to_string (k) + " _b" + std::to_string (bits));
10371041 }
10381042 lut_ctor_int8_g4 (src1, lut_scales, lut_biases, qlut, k, kernel_config);
10391043}
10401044
1041- void ggml_tmac_mul_mat_task_compute (void * src0, void * scales, void * qlut, void * lut_scales, void * lut_biases, void * dst, int n, int k, int m, int bits) {
1045+ static inline void ggml_tmac_mul_mat_task_compute (void * src0, void * scales, void * qlut, void * lut_scales, void * lut_biases, void * dst, int n, int k, int m, int bits) {
10421046 struct tmac_kernel_config * kernel_config = find_tmac_kernel_config (n, k, bits);
10431047 if (kernel_config == nullptr ) {
10441048 GGML_LOG_INFO (" Failed to find kernel config for m%d_k%d_b%d\n " , n, k, bits);
0 commit comments