@@ -157,6 +157,7 @@ static std::string pair_str(const std::pair<int, int> & p) {
157157
158158struct cmd_params {
159159 std::vector<std::string> model;
160+ std::vector<std::string> lora;
160161 std::vector<int > n_prompt;
161162 std::vector<int > n_gen;
162163 std::vector<std::pair<int , int >> n_pg;
@@ -189,6 +190,7 @@ struct cmd_params {
189190
190191static const cmd_params cmd_params_defaults = {
191192 /* model */ { " models/7B/ggml-model-q4_0.gguf" },
193+ /* lora */ { " none" },
192194 /* n_prompt */ { 512 },
193195 /* n_gen */ { 128 },
194196 /* n_pg */ {},
@@ -225,6 +227,7 @@ static void print_usage(int /* argc */, char ** argv) {
225227 printf (" options:\n " );
226228 printf (" -h, --help\n " );
227229 printf (" -m, --model <filename> (default: %s)\n " , join (cmd_params_defaults.model , " ," ).c_str ());
230+ printf (" --lora <filename> (default: %s)\n " , join (cmd_params_defaults.lora , " ," ).c_str ());
228231 printf (" -p, --n-prompt <n> (default: %s)\n " ,
229232 join (cmd_params_defaults.n_prompt , " ," ).c_str ());
230233 printf (" -n, --n-gen <n> (default: %s)\n " , join (cmd_params_defaults.n_gen , " ," ).c_str ());
@@ -341,6 +344,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
341344 }
342345 auto p = string_split<std::string>(argv[i], split_delim);
343346 params.model .insert (params.model .end (), p.begin (), p.end ());
347+ } else if (arg == " --lora" ) {
348+ if (++i >= argc) {
349+ invalid_param = true ;
350+ break ;
351+ }
352+ auto p = string_split<std::string>(argv[i], split_delim);
353+ params.lora .insert (params.lora .end (), p.begin (), p.end ());
344354 } else if (arg == " -p" || arg == " --n-prompt" ) {
345355 if (++i >= argc) {
346356 invalid_param = true ;
@@ -606,6 +616,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
606616 if (params.model .empty ()) {
607617 params.model = cmd_params_defaults.model ;
608618 }
619+ if (params.lora .empty ()) {
620+ params.lora = cmd_params_defaults.lora ;
621+ }
609622 if (params.n_prompt .empty ()) {
610623 params.n_prompt = cmd_params_defaults.n_prompt ;
611624 }
@@ -672,6 +685,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
672685
673686struct cmd_params_instance {
674687 std::string model;
688+ std::string lora;
675689 int n_prompt;
676690 int n_gen;
677691 int n_batch;
@@ -737,7 +751,7 @@ struct cmd_params_instance {
737751 }
738752
739753 bool equal_mparams (const cmd_params_instance & other) const {
740- return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str &&
754+ return model == other.model && lora == other. lora && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str &&
741755 split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
742756 tensor_split == other.tensor_split ;
743757 }
@@ -764,6 +778,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
764778 // this ordering minimizes the number of times that each model needs to be reloaded
765779 // clang-format off
766780 for (const auto & m : params.model )
781+ for (const auto & l : params.lora )
767782 for (const auto & nl : params.n_gpu_layers )
768783 for (const auto & rpc : params.rpc_servers )
769784 for (const auto & sm : params.split_mode )
@@ -787,6 +802,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
787802 }
788803 cmd_params_instance instance = {
789804 /* .model = */ m,
805+ /* .lora = */ l,
790806 /* .n_prompt = */ n_prompt,
791807 /* .n_gen = */ 0 ,
792808 /* .n_batch = */ nb,
@@ -816,6 +832,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
816832 }
817833 cmd_params_instance instance = {
818834 /* .model = */ m,
835+ /* .lora = */ l,
819836 /* .n_prompt = */ 0 ,
820837 /* .n_gen = */ n_gen,
821838 /* .n_batch = */ nb,
@@ -845,6 +862,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
845862 }
846863 cmd_params_instance instance = {
847864 /* .model = */ m,
865+ /* .lora = */ l,
848866 /* .n_prompt = */ n_pg.first ,
849867 /* .n_gen = */ n_pg.second ,
850868 /* .n_batch = */ nb,
@@ -879,6 +897,7 @@ struct test {
879897 static const std::string cpu_info;
880898 static const std::string gpu_info;
881899 std::string model_filename;
900+ std::string lora_filename;
882901 std::string model_type;
883902 uint64_t model_size;
884903 uint64_t model_n_params;
@@ -905,6 +924,7 @@ struct test {
905924
906925 test (const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) {
907926 model_filename = inst.model ;
927+ lora_filename = inst.lora ;
908928 char buf[128 ];
909929 llama_model_desc (lmodel, buf, sizeof (buf));
910930 model_type = buf;
@@ -966,12 +986,12 @@ struct test {
966986
967987 static const std::vector<std::string> & get_fields () {
968988 static const std::vector<std::string> fields = {
969- " build_commit" , " build_number" , " cpu_info" , " gpu_info" , " backends" , " model_filename" ,
970- " model_type" , " model_size" , " model_n_params" , " n_batch" , " n_ubatch" , " n_threads " ,
971- " cpu_mask " , " cpu_strict " , " poll" , " type_k" , " type_v" , " n_gpu_layers " ,
972- " split_mode" , " main_gpu" , " no_kv_offload" , " flash_attn" , " tensor_split" , " use_mmap " ,
973- " embeddings " , " n_prompt " , " n_gen" , " test_time" , " avg_ns" , " stddev_ns " ,
974- " avg_ts" , " stddev_ts" ,
989+ " build_commit" , " build_number" , " cpu_info" , " gpu_info" , " backends" , " model_filename" ,
990+ " lora_filename " , " model_type" , " model_size" , " model_n_params" , " n_batch" , " n_ubatch" ,
991+ " n_threads " , " cpu_mask " , " cpu_strict " , " poll" , " type_k" , " type_v" ,
992+ " n_gpu_layers " , " split_mode" , " main_gpu" , " no_kv_offload" , " flash_attn" , " tensor_split" ,
993+ " use_mmap " , " embeddings " , " n_prompt " , " n_gen" , " test_time" , " avg_ns" ,
994+ " stddev_ns " , " avg_ts" , " stddev_ts" ,
975995 };
976996 return fields;
977997 }
@@ -1017,6 +1037,7 @@ struct test {
10171037 gpu_info,
10181038 get_backend (),
10191039 model_filename,
1040+ lora_filename,
10201041 model_type,
10211042 std::to_string (model_size),
10221043 std::to_string (model_n_params),
@@ -1259,6 +1280,9 @@ struct markdown_printer : public printer {
12591280 void print_header (const cmd_params & params) override {
12601281 // select fields to print
12611282 fields.emplace_back (" model" );
1283+ if (params.lora .size () > 1 || (!params.lora .empty () && params.lora [0 ] != " none" )) {
1284+ fields.emplace_back (" lora" );
1285+ }
12621286 fields.emplace_back (" size" );
12631287 fields.emplace_back (" params" );
12641288 fields.emplace_back (" backend" );
@@ -1337,6 +1361,8 @@ struct markdown_printer : public printer {
13371361 char buf[128 ];
13381362 if (field == " model" ) {
13391363 value = t.model_type ;
1364+ } else if (field == " lora" ) {
1365+ value = t.lora_filename .empty () || t.lora_filename == " none" ? " N" : " Y" ;
13401366 } else if (field == " size" ) {
13411367 if (t.model_size < 1024 * 1024 * 1024 ) {
13421368 snprintf (buf, sizeof (buf), " %.2f MiB" , t.model_size / 1024.0 / 1024.0 );
@@ -1561,6 +1587,9 @@ int main(int argc, char ** argv) {
15611587 }
15621588
15631589 lmodel = llama_model_load_from_file (inst.model .c_str (), inst.to_llama_mparams ());
1590+ if (!inst.lora .empty () && inst.lora != " none" ) {
1591+ llama_adapter_lora_init (lmodel, inst.lora .c_str ());
1592+ }
15641593 if (lmodel == NULL ) {
15651594 fprintf (stderr, " %s: error: failed to load model '%s'\n " , __func__, inst.model .c_str ());
15661595 return 1 ;
0 commit comments