@@ -357,6 +357,7 @@ struct cmd_params {
357357 std::vector<bool > use_mmap;
358358 std::vector<bool > embeddings;
359359 std::vector<bool > no_op_offload;
360+ std::vector<bool > no_host;
360361 ggml_numa_strategy numa;
361362 int reps;
362363 ggml_sched_priority prio;
@@ -394,6 +395,7 @@ static const cmd_params cmd_params_defaults = {
394395 /* use_mmap */ { true },
395396 /* embeddings */ { false },
396397 /* no_op_offload */ { false },
398+ /* no_host */ { false },
397399 /* numa */ GGML_NUMA_STRATEGY_DISABLED,
398400 /* reps */ 5 ,
399401 /* prio */ GGML_SCHED_PRIO_NORMAL,
@@ -474,6 +476,8 @@ static void print_usage(int /* argc */, char ** argv) {
474476 printf (" -ot --override-tensor <tensor name pattern>=<buffer type>;...\n " );
475477 printf (" (default: disabled)\n " );
476478 printf (" -nopo, --no-op-offload <0|1> (default: 0)\n " );
479+ printf (" --no-host <0|1> (default: %s)\n " ,
480+ join (cmd_params_defaults.no_host , " ," ).c_str ());
477481 printf (" \n " );
478482 printf (
479483 " Multiple values can be given for each parameter by separating them with ','\n "
@@ -803,6 +807,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
803807 }
804808 auto p = string_split<bool >(argv[i], split_delim);
805809 params.no_op_offload .insert (params.no_op_offload .end (), p.begin (), p.end ());
810+ } else if (arg == " --no-host" ) {
811+ if (++i >= argc) {
812+ invalid_param = true ;
813+ break ;
814+ }
815+ auto p = string_split<bool >(argv[i], split_delim);
816+ params.no_host .insert (params.no_host .end (), p.begin (), p.end ());
806817 } else if (arg == " -ts" || arg == " --tensor-split" ) {
807818 if (++i >= argc) {
808819 invalid_param = true ;
@@ -1024,6 +1035,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
10241035 if (params.no_op_offload .empty ()) {
10251036 params.no_op_offload = cmd_params_defaults.no_op_offload ;
10261037 }
1038+ if (params.no_host .empty ()) {
1039+ params.no_host = cmd_params_defaults.no_host ;
1040+ }
10271041 if (params.n_threads .empty ()) {
10281042 params.n_threads = cmd_params_defaults.n_threads ;
10291043 }
@@ -1065,6 +1079,7 @@ struct cmd_params_instance {
10651079 bool use_mmap;
10661080 bool embeddings;
10671081 bool no_op_offload;
1082+ bool no_host;
10681083
10691084 llama_model_params to_llama_mparams () const {
10701085 llama_model_params mparams = llama_model_default_params ();
@@ -1077,6 +1092,7 @@ struct cmd_params_instance {
10771092 mparams.main_gpu = main_gpu;
10781093 mparams.tensor_split = tensor_split.data ();
10791094 mparams.use_mmap = use_mmap;
1095+ mparams.no_host = no_host;
10801096
10811097 if (n_cpu_moe <= 0 ) {
10821098 if (tensor_buft_overrides.empty ()) {
@@ -1159,6 +1175,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
11591175 for (const auto & mmp : params.use_mmap )
11601176 for (const auto & embd : params.embeddings )
11611177 for (const auto & nopo : params.no_op_offload )
1178+ for (const auto & noh : params.no_host )
11621179 for (const auto & nb : params.n_batch )
11631180 for (const auto & nub : params.n_ubatch )
11641181 for (const auto & tk : params.type_k )
@@ -1199,6 +1216,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
11991216 /* .use_mmap = */ mmp,
12001217 /* .embeddings = */ embd,
12011218 /* .no_op_offload= */ nopo,
1219+ /* .no_host = */ noh,
12021220 };
12031221 instances.push_back (instance);
12041222 }
@@ -1232,6 +1250,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
12321250 /* .use_mmap = */ mmp,
12331251 /* .embeddings = */ embd,
12341252 /* .no_op_offload= */ nopo,
1253+ /* .no_host = */ noh,
12351254 };
12361255 instances.push_back (instance);
12371256 }
@@ -1265,6 +1284,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
12651284 /* .use_mmap = */ mmp,
12661285 /* .embeddings = */ embd,
12671286 /* .no_op_offload= */ nopo,
1287+ /* .no_host = */ noh,
12681288 };
12691289 instances.push_back (instance);
12701290 }
@@ -1303,6 +1323,7 @@ struct test {
13031323 bool use_mmap;
13041324 bool embeddings;
13051325 bool no_op_offload;
1326+ bool no_host;
13061327 int n_prompt;
13071328 int n_gen;
13081329 int n_depth;
@@ -1339,6 +1360,7 @@ struct test {
13391360 use_mmap = inst.use_mmap ;
13401361 embeddings = inst.embeddings ;
13411362 no_op_offload = inst.no_op_offload ;
1363+ no_host = inst.no_host ;
13421364 n_prompt = inst.n_prompt ;
13431365 n_gen = inst.n_gen ;
13441366 n_depth = inst.n_depth ;
@@ -1386,8 +1408,8 @@ struct test {
13861408 " type_k" , " type_v" , " n_gpu_layers" , " n_cpu_moe" , " split_mode" ,
13871409 " main_gpu" , " no_kv_offload" , " flash_attn" , " devices" , " tensor_split" ,
13881410 " tensor_buft_overrides" , " use_mmap" , " embeddings" , " no_op_offload" ,
1389- " n_prompt " , " n_gen " , " n_depth " , " test_time " , " avg_ns " ,
1390- " stddev_ns" , " avg_ts" , " stddev_ts"
1411+ " no_host " , " n_prompt " , " n_gen " , " n_depth " , " test_time " ,
1412+ " avg_ns " , " stddev_ns" , " avg_ts" , " stddev_ts"
13911413 };
13921414 return fields;
13931415 }
@@ -1402,7 +1424,7 @@ struct test {
14021424 return INT;
14031425 }
14041426 if (field == " f16_kv" || field == " no_kv_offload" || field == " cpu_strict" || field == " flash_attn" ||
1405- field == " use_mmap" || field == " embeddings" ) {
1427+ field == " use_mmap" || field == " embeddings" || field == " no_host " ) {
14061428 return BOOL;
14071429 }
14081430 if (field == " avg_ts" || field == " stddev_ts" ) {
@@ -1477,6 +1499,7 @@ struct test {
14771499 std::to_string (use_mmap),
14781500 std::to_string (embeddings),
14791501 std::to_string (no_op_offload),
1502+ std::to_string (no_host),
14801503 std::to_string (n_prompt),
14811504 std::to_string (n_gen),
14821505 std::to_string (n_depth),
@@ -1665,6 +1688,9 @@ struct markdown_printer : public printer {
16651688 if (field == " no_op_offload" ) {
16661689 return 4 ;
16671690 }
1691+ if (field == " no_host" ) {
1692+ return 4 ;
1693+ }
16681694
16691695 int width = std::max ((int ) field.length (), 10 );
16701696
@@ -1699,6 +1725,9 @@ struct markdown_printer : public printer {
16991725 if (field == " no_op_offload" ) {
17001726 return " nopo" ;
17011727 }
1728+ if (field == " no_host" ) {
1729+ return " noh" ;
1730+ }
17021731 if (field == " devices" ) {
17031732 return " dev" ;
17041733 }
@@ -1779,6 +1808,9 @@ struct markdown_printer : public printer {
17791808 if (params.no_op_offload .size () > 1 || params.no_op_offload != cmd_params_defaults.no_op_offload ) {
17801809 fields.emplace_back (" no_op_offload" );
17811810 }
1811+ if (params.no_host .size () > 1 || params.no_host != cmd_params_defaults.no_host ) {
1812+ fields.emplace_back (" no_host" );
1813+ }
17821814 fields.emplace_back (" test" );
17831815 fields.emplace_back (" t/s" );
17841816
0 commit comments