@@ -261,6 +261,7 @@ struct cmd_params {
261261 std::vector<bool > use_mmap;
262262 std::vector<bool > embeddings;
263263 std::vector<bool > no_op_offload;
264+ std::vector<bool > graph_reuse;
264265 ggml_numa_strategy numa;
265266 int reps;
266267 ggml_sched_priority prio;
@@ -298,6 +299,7 @@ static const cmd_params cmd_params_defaults = {
298299 /* use_mmap */ { true },
299300 /* embeddings */ { false },
300301 /* no_op_offload */ { false },
302+ /* graph_reuse */ { false },
301303 /* numa */ GGML_NUMA_STRATEGY_DISABLED,
302304 /* reps */ 5 ,
303305 /* prio */ GGML_SCHED_PRIO_NORMAL,
@@ -377,6 +379,7 @@ static void print_usage(int /* argc */, char ** argv) {
377379 printf (" -ot --override-tensors <tensor name pattern>=<buffer type>;...\n " );
378380 printf (" (default: disabled)\n " );
379381 printf (" -nopo, --no-op-offload <0|1> (default: 0)\n " );
382+ printf (" -gr, --graph-reuse <0|1> (default: 0)\n " );
380383 printf (" \n " );
381384 printf (
382385 " Multiple values can be given for each parameter by separating them with ','\n "
@@ -620,6 +623,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
620623 }
621624 auto p = string_split<bool >(argv[i], split_delim);
622625 params.no_kv_offload .insert (params.no_kv_offload .end (), p.begin (), p.end ());
626+ } else if (arg == " -gr" || arg == " --graph-reuse" ) {
627+ if (++i >= argc) {
628+ invalid_param = true ;
629+ break ;
630+ }
631+ auto p = string_split<bool >(argv[i], split_delim);
632+ params.graph_reuse .insert (params.graph_reuse .end (), p.begin (), p.end ());
623633 } else if (arg == " --numa" ) {
624634 if (++i >= argc) {
625635 invalid_param = true ;
@@ -885,6 +895,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
885895 if (params.no_op_offload .empty ()) {
886896 params.no_op_offload = cmd_params_defaults.no_op_offload ;
887897 }
898+ if (params.graph_reuse .empty ()) {
899+ params.graph_reuse = cmd_params_defaults.graph_reuse ;
900+ }
888901 if (params.n_threads .empty ()) {
889902 params.n_threads = cmd_params_defaults.n_threads ;
890903 }
@@ -926,6 +939,7 @@ struct cmd_params_instance {
926939 bool use_mmap;
927940 bool embeddings;
928941 bool no_op_offload;
942+ bool graph_reuse;
929943
930944 llama_model_params to_llama_mparams () const {
931945 llama_model_params mparams = llama_model_default_params ();
@@ -998,6 +1012,7 @@ struct cmd_params_instance {
9981012 cparams.embeddings = embeddings;
9991013 cparams.op_offload = !no_op_offload;
10001014 cparams.swa_full = false ;
1015+ cparams.graph_reuse = graph_reuse;
10011016
10021017 return cparams;
10031018 }
@@ -1018,6 +1033,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10181033 for (const auto & mmp : params.use_mmap )
10191034 for (const auto & embd : params.embeddings )
10201035 for (const auto & nopo : params.no_op_offload )
1036+ for (const auto & gr : params.graph_reuse )
10211037 for (const auto & nb : params.n_batch )
10221038 for (const auto & nub : params.n_ubatch )
10231039 for (const auto & tk : params.type_k )
@@ -1059,6 +1075,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10591075 /* .use_mmap = */ mmp,
10601076 /* .embeddings = */ embd,
10611077 /* .no_op_offload= */ nopo,
1078+ /* .graph_reuse = */ gr,
10621079 };
10631080 instances.push_back (instance);
10641081 }
@@ -1092,6 +1109,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
10921109 /* .use_mmap = */ mmp,
10931110 /* .embeddings = */ embd,
10941111 /* .no_op_offload= */ nopo,
1112+ /* .graph_reuse = */ gr,
10951113 };
10961114 instances.push_back (instance);
10971115 }
@@ -1125,6 +1143,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
11251143 /* .use_mmap = */ mmp,
11261144 /* .embeddings = */ embd,
11271145 /* .no_op_offload= */ nopo,
1146+ /* .graph_reuse = */ gr,
11281147 };
11291148 instances.push_back (instance);
11301149 }
@@ -1162,6 +1181,7 @@ struct test {
11621181 bool use_mmap;
11631182 bool embeddings;
11641183 bool no_op_offload;
1184+ bool graph_reuse;
11651185 int n_prompt;
11661186 int n_gen;
11671187 int n_depth;
@@ -1197,6 +1217,7 @@ struct test {
11971217 use_mmap = inst.use_mmap ;
11981218 embeddings = inst.embeddings ;
11991219 no_op_offload = inst.no_op_offload ;
1220+ graph_reuse = inst.graph_reuse ;
12001221 n_prompt = inst.n_prompt ;
12011222 n_gen = inst.n_gen ;
12021223 n_depth = inst.n_depth ;
@@ -1243,8 +1264,8 @@ struct test {
12431264 " cpu_mask" , " cpu_strict" , " poll" , " type_k" , " type_v" , " n_gpu_layers" ,
12441265 " split_mode" , " main_gpu" , " no_kv_offload" , " flash_attn" , " tensor_split" , " tensor_buft_overrides" ,
12451266 " defrag_thold" ,
1246- " use_mmap" , " embeddings" , " no_op_offload" , " n_prompt" , " n_gen" , " n_depth" , " test_time " ,
1247- " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts" ,
1267+ " use_mmap" , " embeddings" , " no_op_offload" , " graph_reuse " , " n_prompt" , " n_gen" , " n_depth" ,
1268+ " test_time " , " avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts" ,
12481269 };
12491270 return fields;
12501271 }
@@ -1259,7 +1280,7 @@ struct test {
12591280 return INT;
12601281 }
12611282 if (field == " f16_kv" || field == " no_kv_offload" || field == " cpu_strict" || field == " flash_attn" ||
1262- field == " use_mmap" || field == " embeddings" ) {
1283+ field == " use_mmap" || field == " embeddings" || field == " graph_reuse " ) {
12631284 return BOOL;
12641285 }
12651286 if (field == " avg_ts" || field == " stddev_ts" || field == " defrag_thold" ) {
@@ -1333,6 +1354,7 @@ struct test {
13331354 std::to_string (use_mmap),
13341355 std::to_string (embeddings),
13351356 std::to_string (no_op_offload),
1357+ std::to_string (graph_reuse),
13361358 std::to_string (n_prompt),
13371359 std::to_string (n_gen),
13381360 std::to_string (n_depth),
@@ -1518,6 +1540,9 @@ struct markdown_printer : public printer {
15181540 if (field == " no_op_offload" ) {
15191541 return 4 ;
15201542 }
1543+ if (field == " graph_reuse" ) {
1544+ return 4 ;
1545+ }
15211546
15221547 int width = std::max ((int ) field.length (), 10 );
15231548
@@ -1552,6 +1577,9 @@ struct markdown_printer : public printer {
15521577 if (field == " no_op_offload" ) {
15531578 return " nopo" ;
15541579 }
1580+ if (field == " graph_reuse" ) {
1581+ return " gr" ;
1582+ }
15551583 if (field == " tensor_split" ) {
15561584 return " ts" ;
15571585 }
@@ -1626,6 +1654,9 @@ struct markdown_printer : public printer {
16261654 if (params.no_op_offload .size () > 1 || params.no_op_offload != cmd_params_defaults.no_op_offload ) {
16271655 fields.emplace_back (" no_op_offload" );
16281656 }
1657+ if (params.graph_reuse .size () > 1 || params.graph_reuse != cmd_params_defaults.graph_reuse ) {
1658+ fields.emplace_back (" graph_reuse" );
1659+ }
16291660 fields.emplace_back (" test" );
16301661 fields.emplace_back (" t/s" );
16311662
0 commit comments