@@ -413,7 +413,7 @@ static void ggml_tmac_tune_kernel_config(const struct ggml_tensor * tensor, int
413413 }
414414
415415 for (int kfactor: kfactors) {
416- if (kfactor < kernel_config.actk ) {
416+ if (( kfactor < kernel_config.actk ) || (kfactor * kernel_config. g > kernel_config. q_group_size ) ) {
417417 continue ;
418418 }
419419
@@ -455,7 +455,7 @@ static void ggml_tmac_tune_kernel_config(const struct ggml_tensor * tensor, int
455455
456456 int largest_kfactor = 0 ;
457457 for (int kfactor: kfactors) {
458- if (kfactor < kernel_config.actk ) {
458+ if (( kfactor < kernel_config.actk ) || (kfactor * kernel_config. g > kernel_config. q_group_size ) ) {
459459 continue ;
460460 }
461461 if (kfactor > largest_kfactor) {
@@ -468,8 +468,8 @@ static void ggml_tmac_tune_kernel_config(const struct ggml_tensor * tensor, int
468468
469469 // Save the results
470470 insert_or_assign_tmac_kernel_config (M, K, bits, best_kcfg);
471- GGML_LOG_INFO (" Tuned kernel config: M=%d, N=%d, K=%d, bm=%d, kfactor=%d, bits=%d, g=%d, ngroups_per_elem=%d, q_group_size=%d, act_group_size=%d\n " ,
472- M, N, K, best_kcfg.bm , best_kcfg.kfactor , bits, best_kcfg.g , best_kcfg.ngroups_per_elem , best_kcfg.q_group_size , best_kcfg.act_group_size );
471+ GGML_LOG_INFO (" Tuned kernel config: M=%d, N=%d, K=%d, bm=%d, kfactor=%d, bits=%d, actk=%d, g=%d, ngroups_per_elem=%d, q_group_size=%d, act_group_size=%d\n " ,
472+ M, N, K, best_kcfg.bm , best_kcfg.kfactor , bits, best_kcfg.actk , best_kcfg. g , best_kcfg.ngroups_per_elem , best_kcfg.q_group_size , best_kcfg.act_group_size );
473473}
474474
475475
0 commit comments