@@ -211,13 +211,19 @@ static std::vector<int> parse_int_range(const std::string & s) {
211211 for (int i = first; i <= last;) {
212212 result.push_back (i);
213213
214+ int prev_i = i;
215+
214216 if (op == ' +' ) {
215217 i += step;
216218 } else if (op == ' *' ) {
217219 i *= step;
218220 } else {
219221 throw std::invalid_argument (" invalid range format" );
220222 }
223+
224+ if (i <= prev_i) {
225+ throw std::invalid_argument (" invalid range" );
226+ }
221227 }
222228 search_start = match.suffix ().first ;
223229 }
@@ -239,6 +245,7 @@ struct cmd_params {
239245 std::vector<int > n_ubatch;
240246 std::vector<ggml_type> type_k;
241247 std::vector<ggml_type> type_v;
248+ std::vector<float > defrag_thold;
242249 std::vector<int > n_threads;
243250 std::vector<std::string> cpu_mask;
244251 std::vector<bool > cpu_strict;
@@ -274,6 +281,7 @@ static const cmd_params cmd_params_defaults = {
274281 /* n_ubatch */ { 512 },
275282 /* type_k */ { GGML_TYPE_F16 },
276283 /* type_v */ { GGML_TYPE_F16 },
284+ /* defrag_thold */ { -1 .0f },
277285 /* n_threads */ { cpu_get_num_math () },
278286 /* cpu_mask */ { " 0x0" },
279287 /* cpu_strict */ { false },
@@ -335,6 +343,8 @@ static void print_usage(int /* argc */, char ** argv) {
335343 join (transform_to_str (cmd_params_defaults.type_k , ggml_type_name), " ," ).c_str ());
336344 printf (" -ctv, --cache-type-v <t> (default: %s)\n " ,
337345 join (transform_to_str (cmd_params_defaults.type_v , ggml_type_name), " ," ).c_str ());
346+ printf (" -dt, --defrag-thold <f> (default: %s)\n " ,
347+ join (cmd_params_defaults.defrag_thold , " ," ).c_str ());
338348 printf (" -t, --threads <n> (default: %s)\n " ,
339349 join (cmd_params_defaults.n_threads , " ," ).c_str ());
340350 printf (" -C, --cpu-mask <hex,hex> (default: %s)\n " ,
@@ -368,7 +378,7 @@ static void print_usage(int /* argc */, char ** argv) {
368378 printf (
369379 " Multiple values can be given for each parameter by separating them with ','\n "
370380 " or by specifying the parameter multiple times. Ranges can be given as\n "
371- " 'start-end ' or 'start-end +step' or 'start-end *mult'.\n " );
381+ " 'first-last ' or 'first-last +step' or 'first-last *mult'.\n " );
372382}
373383
374384static ggml_type ggml_type_from_name (const std::string & s) {
@@ -519,6 +529,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
519529 break ;
520530 }
521531 params.type_v .insert (params.type_v .end (), types.begin (), types.end ());
532+ } else if (arg == " -dt" || arg == " --defrag-thold" ) {
533+ if (++i >= argc) {
534+ invalid_param = true ;
535+ break ;
536+ }
537+ auto p = string_split<float >(argv[i], split_delim);
538+ params.defrag_thold .insert (params.defrag_thold .end (), p.begin (), p.end ());
522539 } else if (arg == " -t" || arg == " --threads" ) {
523540 if (++i >= argc) {
524541 invalid_param = true ;
@@ -825,6 +842,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
825842 if (params.type_v .empty ()) {
826843 params.type_v = cmd_params_defaults.type_v ;
827844 }
845+ if (params.defrag_thold .empty ()) {
846+ params.defrag_thold = cmd_params_defaults.defrag_thold ;
847+ }
828848 if (params.n_gpu_layers .empty ()) {
829849 params.n_gpu_layers = cmd_params_defaults.n_gpu_layers ;
830850 }
@@ -883,6 +903,7 @@ struct cmd_params_instance {
883903 int n_ubatch;
884904 ggml_type type_k;
885905 ggml_type type_v;
906+ float defrag_thold;
886907 int n_threads;
887908 std::string cpu_mask;
888909 bool cpu_strict;
@@ -959,15 +980,16 @@ struct cmd_params_instance {
959980 llama_context_params to_llama_cparams () const {
960981 llama_context_params cparams = llama_context_default_params ();
961982
962- cparams.n_ctx = n_prompt + n_gen + n_depth;
963- cparams.n_batch = n_batch;
964- cparams.n_ubatch = n_ubatch;
965- cparams.type_k = type_k;
966- cparams.type_v = type_v;
967- cparams.offload_kqv = !no_kv_offload;
968- cparams.flash_attn = flash_attn;
969- cparams.embeddings = embeddings;
970- cparams.op_offload = !no_op_offload;
983+ cparams.n_ctx = n_prompt + n_gen + n_depth;
984+ cparams.n_batch = n_batch;
985+ cparams.n_ubatch = n_ubatch;
986+ cparams.type_k = type_k;
987+ cparams.type_v = type_v;
988+ cparams.defrag_thold = defrag_thold;
989+ cparams.offload_kqv = !no_kv_offload;
990+ cparams.flash_attn = flash_attn;
991+ cparams.embeddings = embeddings;
992+ cparams.op_offload = !no_op_offload;
971993
972994 return cparams;
973995 }
@@ -992,6 +1014,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
9921014 for (const auto & nub : params.n_ubatch )
9931015 for (const auto & tk : params.type_k )
9941016 for (const auto & tv : params.type_v )
1017+ for (const auto & defrag_thold : params.defrag_thold )
9951018 for (const auto & nkvo : params.no_kv_offload )
9961019 for (const auto & fa : params.flash_attn )
9971020 for (const auto & nt : params.n_threads )
@@ -1012,6 +1035,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10121035 /* .n_ubatch = */ nub,
10131036 /* .type_k = */ tk,
10141037 /* .type_v = */ tv,
1038+ /* .defrag_thold = */ defrag_thold,
10151039 /* .n_threads = */ nt,
10161040 /* .cpu_mask = */ cm,
10171041 /* .cpu_strict = */ cs,
@@ -1044,6 +1068,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10441068 /* .n_ubatch = */ nub,
10451069 /* .type_k = */ tk,
10461070 /* .type_v = */ tv,
1071+ /* .defrag_thold = */ defrag_thold,
10471072 /* .n_threads = */ nt,
10481073 /* .cpu_mask = */ cm,
10491074 /* .cpu_strict = */ cs,
@@ -1076,6 +1101,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10761101 /* .n_ubatch = */ nub,
10771102 /* .type_k = */ tk,
10781103 /* .type_v = */ tv,
1104+ /* .defrag_thold = */ defrag_thold,
10791105 /* .n_threads = */ nt,
10801106 /* .cpu_mask = */ cm,
10811107 /* .cpu_strict = */ cs,
@@ -1117,6 +1143,7 @@ struct test {
11171143 int poll;
11181144 ggml_type type_k;
11191145 ggml_type type_v;
1146+ float defrag_thold;
11201147 int n_gpu_layers;
11211148 llama_split_mode split_mode;
11221149 int main_gpu;
@@ -1151,6 +1178,7 @@ struct test {
11511178 poll = inst.poll ;
11521179 type_k = inst.type_k ;
11531180 type_v = inst.type_v ;
1181+ defrag_thold = inst.defrag_thold ;
11541182 n_gpu_layers = inst.n_gpu_layers ;
11551183 split_mode = inst.split_mode ;
11561184 main_gpu = inst.main_gpu ;
@@ -1206,6 +1234,7 @@ struct test {
12061234 " model_type" , " model_size" , " model_n_params" , " n_batch" , " n_ubatch" , " n_threads" ,
12071235 " cpu_mask" , " cpu_strict" , " poll" , " type_k" , " type_v" , " n_gpu_layers" ,
12081236 " split_mode" , " main_gpu" , " no_kv_offload" , " flash_attn" , " tensor_split" , " tensor_buft_overrides" ,
1237+ " defrag_thold" ,
12091238 " use_mmap" , " embeddings" , " no_op_offload" , " n_prompt" , " n_gen" , " n_depth" , " test_time" ,
12101239 " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts" ,
12111240 };
@@ -1225,7 +1254,7 @@ struct test {
12251254 field == " use_mmap" || field == " embeddings" ) {
12261255 return BOOL;
12271256 }
1228- if (field == " avg_ts" || field == " stddev_ts" ) {
1257+ if (field == " avg_ts" || field == " stddev_ts" || field == " defrag_thold " ) {
12291258 return FLOAT;
12301259 }
12311260 return STRING;
@@ -1292,6 +1321,7 @@ struct test {
12921321 std::to_string (flash_attn),
12931322 tensor_split_str,
12941323 tensor_buft_overrides_str,
1324+ std::to_string (defrag_thold),
12951325 std::to_string (use_mmap),
12961326 std::to_string (embeddings),
12971327 std::to_string (no_op_offload),
@@ -1558,6 +1588,9 @@ struct markdown_printer : public printer {
15581588 if (params.type_v .size () > 1 || params.type_v != cmd_params_defaults.type_v ) {
15591589 fields.emplace_back (" type_v" );
15601590 }
1591+ if (params.defrag_thold .size () > 1 || params.defrag_thold != cmd_params_defaults.defrag_thold ) {
1592+ fields.emplace_back (" defrag_thold" );
1593+ }
15611594 if (params.main_gpu .size () > 1 || params.main_gpu != cmd_params_defaults.main_gpu ) {
15621595 fields.emplace_back (" main_gpu" );
15631596 }
0 commit comments