@@ -1238,6 +1238,7 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
12381238 common_params_print_completion (ctx_arg);
12391239 exit (0 );
12401240 }
1241+ params.lr .init ();
12411242 } catch (const std::invalid_argument & ex) {
12421243 fprintf (stderr, " %s\n " , ex.what ());
12431244 ctx_arg.params = params_org;
@@ -1506,6 +1507,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
15061507 params.swa_full = true ;
15071508 }
15081509 ).set_env (" LLAMA_ARG_SWA_FULL" ));
1510+ add_opt (common_arg (
1511+ {" --swa-checkpoints" }, " N" ,
1512+ string_format (" max number of SWA checkpoints per slot to create (default: %d)\n "
1513+ " [(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)" , params.n_swa_checkpoints ),
1514+ [](common_params & params, int value) {
1515+ params.n_swa_checkpoints = value;
1516+ }
1517+ ).set_env (" LLAMA_ARG_SWA_CHECKPOINTS" ).set_examples ({LLAMA_EXAMPLE_SERVER}));
15091518 add_opt (common_arg (
15101519 {" --kv-unified" , " -kvu" },
15111520 string_format (" use single unified KV buffer for the KV cache of all sequences (default: %s)\n "
@@ -2688,7 +2697,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
26882697 [](common_params & params, const std::string & value) {
26892698 params.out_file = value;
26902699 }
2691- ).set_examples ({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS}));
2700+ ).set_examples ({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE }));
26922701 add_opt (common_arg (
26932702 {" -ofreq" , " --output-frequency" }, " N" ,
26942703 string_format (" output the imatrix every N iterations (default: %d)" , params.n_out_freq ),
@@ -3566,5 +3575,51 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35663575 ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
35673576
35683577
3578+ add_opt (
3579+ common_arg ({ " -lr" , " --learning-rate" }, " ALPHA" ,
3580+ string_format (
3581+ " adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)" ,
3582+ (double ) params.lr .lr0 ),
3583+ [](common_params & params, const std::string & value) { params.lr .lr0 = std::stof (value); })
3584+ .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3585+ add_opt (
3586+ common_arg ({ " -lr-min" , " --learning-rate-min" }, " ALPHA" ,
3587+ string_format (
3588+ " (if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)" ,
3589+ (double ) params.lr .lr_min ),
3590+ [](common_params & params, const std::string & value) { params.lr .lr_min = std::stof (value); })
3591+ .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3592+ add_opt (
3593+ common_arg ({ " -decay-epochs" , " --learning-rate-decay-epochs" }, " ALPHA" ,
3594+ string_format (
3595+ " (if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)" ,
3596+ (double ) params.lr .decay_epochs ),
3597+ [](common_params & params, const std::string & value) { params.lr .decay_epochs = std::stof (value); })
3598+ .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3599+ add_opt (common_arg (
3600+ { " -wd" , " --weight-decay" }, " WD" ,
3601+ string_format (
3602+ " adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g)." ,
3603+ (double ) params.lr .wd ),
3604+ [](common_params & params, const std::string & value) { params.lr .wd = std::stof (value); })
3605+ .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3606+ add_opt (common_arg ({ " -val-split" , " --val-split" }, " FRACTION" ,
3607+ string_format (" fraction of data to use as validation set for training (default: %.2g)." ,
3608+ (double ) params.val_split ),
3609+ [](common_params & params, const std::string & value) { params.val_split = std::stof (value); })
3610+ .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3611+ add_opt (common_arg ({ " -epochs" , " --epochs" }, " N" ,
3612+ string_format (" optimizer max # of epochs (default: %d)" , params.lr .epochs ),
3613+ [](common_params & params, int epochs) { params.lr .epochs = epochs; })
3614+ .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3615+ add_opt (common_arg ({ " -opt" , " --optimizer" }, " sgd|adamw" , " adamw or sgd" ,
3616+ [](common_params & params, const std::string & name) {
3617+ params.optimizer = common_opt_get_optimizer (name.c_str ());
3618+ if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
3619+ throw std::invalid_argument (" invalid --optimizer, valid options: adamw, sgd" );
3620+ }
3621+ })
3622+ .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3623+
35693624 return ctx_arg;
35703625}
0 commit comments