@@ -219,7 +219,7 @@ struct cmd_params {
219219 std::vector<std::vector<llama_model_tensor_buft_override>> tensor_buft_overrides;
220220 std::vector<bool > use_mmap;
221221 std::vector<bool > embeddings;
222- std::vector<bool > disable_op_offload ;
222+ std::vector<bool > no_op_offload ;
223223 ggml_numa_strategy numa;
224224 int reps;
225225 ggml_sched_priority prio;
@@ -254,7 +254,7 @@ static const cmd_params cmd_params_defaults = {
254254 /* tensor_buft_overrides*/ { std::vector<llama_model_tensor_buft_override>{{nullptr ,nullptr }} },
255255 /* use_mmap */ { true },
256256 /* embeddings */ { false },
257- /* disable_op_offload */ { false },
257+ /* no_op_offload */ { false },
258258 /* numa */ GGML_NUMA_STRATEGY_DISABLED,
259259 /* reps */ 5 ,
260260 /* prio */ GGML_SCHED_PRIO_NORMAL,
@@ -313,7 +313,7 @@ static void print_usage(int /* argc */, char ** argv) {
313313 join (cmd_params_defaults.embeddings , " ," ).c_str ());
314314 printf (" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n " );
315315 printf (" -ot --override-tensors <tensor name pattern>=<buffer type>;... (default: disabled)\n " );
316- printf (" -dopo , --disable -op-offload <i> (default: 0)\n " );
316+ printf (" -nopo , --no -op-offload <i> (default: 0)\n " );
317317 printf (" -r, --repetitions <n> (default: %d)\n " , cmd_params_defaults.reps );
318318 printf (" --prio <0|1|2|3> (default: %d)\n " , cmd_params_defaults.prio );
319319 printf (" --delay <0...N> (seconds) (default: %d)\n " , cmd_params_defaults.delay );
@@ -591,13 +591,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
591591 }
592592 auto p = string_split<bool >(argv[i], split_delim);
593593 params.embeddings .insert (params.embeddings .end (), p.begin (), p.end ());
594- } else if (arg == " -dopo " || arg == " --disable -op-offload" ) {
594+ } else if (arg == " -nopo " || arg == " --no -op-offload" ) {
595595 if (++i >= argc) {
596596 invalid_param = true ;
597597 break ;
598598 }
599599 auto p = string_split<bool >(argv[i], split_delim);
600- params.disable_op_offload .insert (params.disable_op_offload .end (), p.begin (), p.end ());
600+ params.no_op_offload .insert (params.no_op_offload .end (), p.begin (), p.end ());
601601 } else if (arg == " -ts" || arg == " --tensor-split" ) {
602602 if (++i >= argc) {
603603 invalid_param = true ;
@@ -804,8 +804,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
804804 if (params.embeddings .empty ()) {
805805 params.embeddings = cmd_params_defaults.embeddings ;
806806 }
807- if (params.disable_op_offload .empty ()) {
808- params.disable_op_offload = cmd_params_defaults.disable_op_offload ;
807+ if (params.no_op_offload .empty ()) {
808+ params.no_op_offload = cmd_params_defaults.no_op_offload ;
809809 }
810810 if (params.n_threads .empty ()) {
811811 params.n_threads = cmd_params_defaults.n_threads ;
@@ -846,7 +846,7 @@ struct cmd_params_instance {
846846 std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
847847 bool use_mmap;
848848 bool embeddings;
849- bool disable_op_offload ;
849+ bool no_op_offload ;
850850
851851 llama_model_params to_llama_mparams () const {
852852 llama_model_params mparams = llama_model_default_params ();
@@ -916,7 +916,7 @@ struct cmd_params_instance {
916916 cparams.offload_kqv = !no_kv_offload;
917917 cparams.flash_attn = flash_attn;
918918 cparams.embeddings = embeddings;
919- cparams.op_offload = !disable_op_offload ;
919+ cparams.op_offload = !no_op_offload ;
920920
921921 return cparams;
922922 }
@@ -936,7 +936,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
936936 for (const auto & ot : params.tensor_buft_overrides )
937937 for (const auto & mmp : params.use_mmap )
938938 for (const auto & embd : params.embeddings )
939- for (const auto & dopo : params.disable_op_offload )
939+ for (const auto & nopo : params.no_op_offload )
940940 for (const auto & nb : params.n_batch )
941941 for (const auto & nub : params.n_ubatch )
942942 for (const auto & tk : params.type_k )
@@ -975,7 +975,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
975975 /* .tensor_buft_overrides = */ ot,
976976 /* .use_mmap = */ mmp,
977977 /* .embeddings = */ embd,
978- /* .disable_op_offload = */ dopo ,
978+ /* .no_op_offload = */ nopo ,
979979 };
980980 instances.push_back (instance);
981981 }
@@ -1007,7 +1007,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10071007 /* .tensor_buft_overrides = */ ot,
10081008 /* .use_mmap = */ mmp,
10091009 /* .embeddings = */ embd,
1010- /* .disable_op_offload = */ dopo ,
1010+ /* .no_op_offload = */ nopo ,
10111011 };
10121012 instances.push_back (instance);
10131013 }
@@ -1039,7 +1039,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10391039 /* .tensor_buft_overrides = */ ot,
10401040 /* .use_mmap = */ mmp,
10411041 /* .embeddings = */ embd,
1042- /* .disable_op_offload = */ dopo ,
1042+ /* .no_op_offload = */ nopo ,
10431043 };
10441044 instances.push_back (instance);
10451045 }
@@ -1075,7 +1075,7 @@ struct test {
10751075 std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
10761076 bool use_mmap;
10771077 bool embeddings;
1078- bool disable_op_offload ;
1078+ bool no_op_offload ;
10791079 int n_prompt;
10801080 int n_gen;
10811081 int n_depth;
@@ -1109,7 +1109,7 @@ struct test {
11091109 tensor_buft_overrides = inst.tensor_buft_overrides ;
11101110 use_mmap = inst.use_mmap ;
11111111 embeddings = inst.embeddings ;
1112- disable_op_offload = inst.disable_op_offload ;
1112+ no_op_offload = inst.no_op_offload ;
11131113 n_prompt = inst.n_prompt ;
11141114 n_gen = inst.n_gen ;
11151115 n_depth = inst.n_depth ;
@@ -1155,7 +1155,7 @@ struct test {
11551155 " model_type" , " model_size" , " model_n_params" , " n_batch" , " n_ubatch" , " n_threads" ,
11561156 " cpu_mask" , " cpu_strict" , " poll" , " type_k" , " type_v" , " n_gpu_layers" ,
11571157 " split_mode" , " main_gpu" , " no_kv_offload" , " flash_attn" , " tensor_split" , " tensor_buft_overrides" ,
1158- " use_mmap" , " embeddings" , " disable_op_offload " , " n_prompt" , " n_gen" , " n_depth" , " test_time" ,
1158+ " use_mmap" , " embeddings" , " no_op_offload " , " n_prompt" , " n_gen" , " n_depth" , " test_time" ,
11591159 " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts" ,
11601160 };
11611161 return fields;
@@ -1167,7 +1167,7 @@ struct test {
11671167 if (field == " build_number" || field == " n_batch" || field == " n_ubatch" || field == " n_threads" ||
11681168 field == " poll" || field == " model_size" || field == " model_n_params" || field == " n_gpu_layers" ||
11691169 field == " main_gpu" || field == " n_prompt" || field == " n_gen" || field == " n_depth" ||
1170- field == " avg_ns" || field == " stddev_ns" || field == " disable_op_offload " ) {
1170+ field == " avg_ns" || field == " stddev_ns" || field == " no_op_offload " ) {
11711171 return INT;
11721172 }
11731173 if (field == " f16_kv" || field == " no_kv_offload" || field == " cpu_strict" || field == " flash_attn" ||
@@ -1243,7 +1243,7 @@ struct test {
12431243 tensor_buft_overrides_str,
12441244 std::to_string (use_mmap),
12451245 std::to_string (embeddings),
1246- std::to_string (disable_op_offload ),
1246+ std::to_string (no_op_offload ),
12471247 std::to_string (n_prompt),
12481248 std::to_string (n_gen),
12491249 std::to_string (n_depth),
@@ -1426,7 +1426,7 @@ struct markdown_printer : public printer {
14261426 if (field == " test" ) {
14271427 return 15 ;
14281428 }
1429- if (field == " disable_op_offload " ) {
1429+ if (field == " no_op_offload " ) {
14301430 return 4 ;
14311431 }
14321432
@@ -1460,8 +1460,8 @@ struct markdown_printer : public printer {
14601460 if (field == " embeddings" ) {
14611461 return " embd" ;
14621462 }
1463- if (field == " disable_op_offload " ) {
1464- return " dopo " ;
1463+ if (field == " no_op_offload " ) {
1464+ return " nopo " ;
14651465 }
14661466 if (field == " tensor_split" ) {
14671467 return " ts" ;
@@ -1531,8 +1531,8 @@ struct markdown_printer : public printer {
15311531 if (params.embeddings .size () > 1 || params.embeddings != cmd_params_defaults.embeddings ) {
15321532 fields.emplace_back (" embeddings" );
15331533 }
1534- if (params.disable_op_offload .size () > 1 || params.disable_op_offload != cmd_params_defaults.disable_op_offload ) {
1535- fields.emplace_back (" disable_op_offload " );
1534+ if (params.no_op_offload .size () > 1 || params.no_op_offload != cmd_params_defaults.no_op_offload ) {
1535+ fields.emplace_back (" no_op_offload " );
15361536 }
15371537 fields.emplace_back (" test" );
15381538 fields.emplace_back (" t/s" );
0 commit comments