@@ -265,7 +265,6 @@ struct cmd_params {
265265 bool no_fug = false ;
266266 bool use_thp = false ;
267267 bool no_ooae = false ;
268- bool mqkv = false ;
269268 output_formats output_format;
270269 output_formats output_format_stderr;
271270};
@@ -304,7 +303,6 @@ static const cmd_params cmd_params_defaults = {
304303 /* no_fug */ false ,
305304 /* use_thp */ false ,
306305 /* no_ooae */ false ,
307- /* mqkv */ false ,
308306 /* output_format */ MARKDOWN,
309307 /* output_format_stderr */ NONE,
310308};
@@ -344,7 +342,6 @@ static void print_usage(int /* argc */, char ** argv) {
344342 printf (" -v, --verbose (default: %s)\n " , cmd_params_defaults.verbose ? " 1" : " 0" );
345343 printf (" -w, --warmup <0|1> (default: %s)\n " , cmd_params_defaults.warmup ? " 1" : " 0" );
346344 printf (" -rtr, --run-time-repack <0|1> (default: %s)\n " , cmd_params_defaults.repack ? " 1" : " 0" );
347- printf (" -mqkv, --merge-qkv (default: %s)\n " , cmd_params_defaults.mqkv ? " 1" : " 0" );
348345 printf (" -thp, --transparent-huge-pages <0|1> (default: %s)\n " , cmd_params_defaults.use_thp ? " 1" : " 0" );
349346 printf (" -ot, --override-tensor pattern (default: none)\n " );
350347 printf (" -fmoe, --fused-moe <0|1> (default: %s)\n " , cmd_params_defaults.fmoe ? " 1" : " 0" );
@@ -736,12 +733,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
736733 break ;
737734 }
738735 params.repack = std::stoi (argv[i]);
739- } else if (arg == " -mqkv" || arg == " --merge-qkv" ) {
740- if (++i >= argc) {
741- invalid_param = true ;
742- break ;
743- }
744- params.mqkv = std::stoi (argv[i]);
745736 } else if (arg == " -thp" || arg == " --transparent-huge-pages" ) {
746737 if (++i >= argc) {
747738 invalid_param = true ;
@@ -860,7 +851,6 @@ struct cmd_params_instance {
860851 bool no_fug = false ;
861852 bool use_thp = false ;
862853 bool no_ooae = false ;
863- bool mqkv = false ;
864854 const llama_model_tensor_buft_override* buft_overrides;
865855
866856 llama_model_params to_llama_mparams () const {
@@ -876,7 +866,6 @@ struct cmd_params_instance {
876866 mparams.use_mmap = use_mmap;
877867 mparams.repack_tensors = repack;
878868 mparams.use_thp = use_thp;
879- mparams.merge_qkv = mqkv;
880869 mparams.tensor_buft_overrides = buft_overrides;
881870
882871 return mparams;
@@ -890,7 +879,6 @@ struct cmd_params_instance {
890879 main_gpu == other.main_gpu &&
891880 use_mmap == other.use_mmap &&
892881 repack == other.repack &&
893- mqkv == other.mqkv &&
894882 use_thp == other.use_thp &&
895883 tensor_split == other.tensor_split ;
896884 }
@@ -973,7 +961,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
973961 /* .no_fug = */ params.no_fug ,
974962 /* .use_thp = */ params.use_thp ,
975963 /* .no_ooae = */ params.no_ooae ,
976- /* .mqkv = */ params.mqkv ,
977964 /* .buft_overrides=*/ params.buft_overrides .data (),
978965 };
979966 instances.push_back (instance);
@@ -1011,7 +998,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1011998 /* .no_fug = */ params.no_fug ,
1012999 /* .use_thp = */ params.use_thp ,
10131000 /* .no_ooae = */ params.no_ooae ,
1014- /* .mqkv = */ params.mqkv ,
10151001 /* .buft_overrides=*/ params.buft_overrides .data (),
10161002 };
10171003 instances.push_back (instance);
@@ -1049,7 +1035,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10491035 /* .no_fug = */ params.no_fug ,
10501036 /* .use_thp = */ params.use_thp ,
10511037 /* .no_ooae = */ params.no_ooae ,
1052- /* .mqkv = */ params.mqkv ,
10531038 /* .buft_overrides=*/ params.buft_overrides .data (),
10541039 };
10551040 instances.push_back (instance);
@@ -1086,8 +1071,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10861071 /* .ger = */ params.ger ,
10871072 /* .no_fug = */ params.no_fug ,
10881073 /* .use_thp = */ params.use_thp ,
1089- /* .no_ooae = */ params.no_ooae ,
1090- /* .mqkv = */ params.mqkv ,
1074+ /* .no_ooae = */ params.no_ooae ,
10911075 /* .buft_overrides=*/ params.buft_overrides .data (),
10921076 };
10931077 instances.push_back (instance);
@@ -1136,7 +1120,6 @@ struct test {
11361120 bool no_fug = false ;
11371121 bool use_thp = false ;
11381122 bool no_ooae = false ;
1139- bool mqkv = false ;
11401123 int n_prompt;
11411124 int n_gen;
11421125 std::string test_time;
@@ -1169,7 +1152,6 @@ struct test {
11691152 use_mmap = inst.use_mmap ;
11701153 embeddings = inst.embeddings ;
11711154 repack = inst.repack ;
1172- mqkv = inst.mqkv ;
11731155 fmoe = inst.fmoe ;
11741156 ger = inst.ger ;
11751157 no_fug = inst.no_fug ;
@@ -1265,7 +1247,7 @@ struct test {
12651247 " n_threads" , " type_k" , " type_v" ,
12661248 " n_gpu_layers" , " split_mode" ,
12671249 " main_gpu" , " no_kv_offload" , " flash_attn" , " mla_attn" , " attn_max_batch" , " ser" ,
1268- " tensor_split" , " use_mmap" , " embeddings" , " repack" , " mqkv " , " fused_moe" , " grouped_er" , " fused_up_gate" , " use_thp" , " ooae" ,
1250+ " tensor_split" , " use_mmap" , " embeddings" , " repack" , " fused_moe" , " grouped_er" , " fused_up_gate" , " use_thp" , " ooae" ,
12691251 " n_prompt" , " n_gen" , " test_time" ,
12701252 " avg_ns" , " stddev_ns" ,
12711253 " avg_ts" , " stddev_ts" , " test" ,
@@ -1287,7 +1269,7 @@ struct test {
12871269 if (field == " cuda" || field == " vulkan" || field == " kompute" || field == " metal" ||
12881270 field == " gpu_blas" || field == " blas" || field == " sycl" ||field == " f16_kv" || field == " no_kv_offload" ||
12891271 field == " flash_attn" || field == " use_mmap" || field == " embeddings" || field == " repack" || field == " use_thp" ||
1290- field == " fused_moe" || field == " grouped_er" || field == " fused_up_gate" || field == " ooae" || field == " mqkv " ) {
1272+ field == " fused_moe" || field == " grouped_er" || field == " fused_up_gate" || field == " ooae" ) {
12911273 return BOOL;
12921274 }
12931275 if (field == " avg_ts" || field == " stddev_ts" ) {
@@ -1331,7 +1313,7 @@ struct test {
13311313 std::to_string (mla_attn), std::to_string (attn_max_batch), ser_to_string (ser),
13321314 tensor_split_str, std::to_string (use_mmap), std::to_string (embeddings),
13331315 std::to_string (repack), std::to_string (fmoe), std::to_string (ger),
1334- std::to_string (no_fug), std::to_string (use_thp), std::to_string (no_ooae), std::to_string (mqkv),
1316+ std::to_string (no_fug), std::to_string (use_thp), std::to_string (no_ooae),
13351317 std::to_string (n_prompt), std::to_string (n_gen), test_time,
13361318 std::to_string (avg_ns ()), std::to_string (stdev_ns ()),
13371319 std::to_string (avg_ts ()), std::to_string (stdev_ts ()),
@@ -1509,9 +1491,6 @@ struct markdown_printer : public printer {
15091491 if (field == " repack" ) {
15101492 return 3 ;
15111493 }
1512- if (field == " mqkv" ) {
1513- return 4 ;
1514- }
15151494 if (field == " use_thp" ) {
15161495 return 3 ;
15171496 }
@@ -1570,9 +1549,6 @@ struct markdown_printer : public printer {
15701549 if (field == " repack" ) {
15711550 return " rtr" ;
15721551 }
1573- if (field == " mqkv" ) {
1574- return " mqkv" ;
1575- }
15761552 if (field == " use_thp" ) {
15771553 return " thp" ;
15781554 }
@@ -1658,9 +1634,6 @@ struct markdown_printer : public printer {
16581634 if (params.repack != cmd_params_defaults.repack ) {
16591635 fields.emplace_back (" repack" );
16601636 }
1661- if (params.mqkv != cmd_params_defaults.mqkv ) {
1662- fields.emplace_back (" mqkv" );
1663- }
16641637 if (params.use_thp != cmd_params_defaults.use_thp ) {
16651638 fields.emplace_back (" use_thp" );
16661639 }
0 commit comments