@@ -742,6 +742,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
742742 exit (0 );
743743 }
744744 ));
745+ add_opt (common_arg (
746+ {" -cl" , " --cache-list" },
747+ " show list of models in cache" ,
748+ [](common_params &) {
749+ printf (" model cache directory: %s\n " , fs_get_cache_directory ().c_str ());
750+ auto models = common_list_cached_models ();
751+ printf (" number of models in cache: %zu\n " , models.size ());
752+ for (size_t i = 0 ; i < models.size (); i++) {
753+ auto & model = models[i];
754+ printf (" %4d. %s\n " , (int ) i + 1 , model.to_string ().c_str ());
755+ }
756+ exit (0 );
757+ }
758+ ));
745759 add_opt (common_arg (
746760 {" --completion-bash" },
747761 " print source-able bash completion script for llama.cpp" ,
@@ -2241,6 +2255,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22412255 params.is_pp_shared = true ;
22422256 }
22432257 ).set_examples ({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
2258+ add_opt (common_arg (
2259+ {" -tgs" },
2260+ string_format (" is the text generation separated across the different sequences (default: %s)" , params.is_tg_separate ? " true" : " false" ),
2261+ [](common_params & params) {
2262+ params.is_tg_separate = true ;
2263+ }
2264+ ).set_examples ({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
22442265 add_opt (common_arg (
22452266 {" -npp" }, " n0,n1,..." ,
22462267 " number of prompt tokens" ,
0 commit comments