@@ -3358,7 +3358,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
33583358 add_opt (common_arg (
33593359 {" --chat-template-kwargs" }, " STRING" ,
33603360 string_format (" sets additional params for the json template parser" ),
3361- [](common_params & params, const std::string & value) {
3361+ [](common_params & params, const std::string & value) {
33623362 auto parsed = json::parse (value);
33633363 for (const auto & item : parsed.items ()) {
33643364 params.default_template_kwargs [item.key ()] = item.value ().dump ();
@@ -3570,21 +3570,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35703570 common_log_set_file (common_log_main (), value.c_str ());
35713571 }
35723572 ));
3573- add_opt (common_arg ({ " --log-colors" }, " [on|off|auto]" ,
3574- " Set colored logging ('on', 'off', or 'auto', default: 'auto')\n "
3575- " 'auto' enables colors when output is to a terminal" ,
3576- [](common_params &, const std::string & value) {
3577- if (is_truthy (value)) {
3578- common_log_set_colors (common_log_main (), LOG_COLORS_ENABLED);
3579- } else if (is_falsey (value)) {
3580- common_log_set_colors (common_log_main (), LOG_COLORS_DISABLED);
3581- } else if (is_autoy (value)) {
3582- common_log_set_colors (common_log_main (), LOG_COLORS_AUTO);
3583- } else {
3584- throw std::invalid_argument (
3585- string_format (" error: unkown value for --log-colors: '%s'\n " , value.c_str ()));
3586- }
3587- }).set_env (" LLAMA_LOG_COLORS" ));
3573+ add_opt (common_arg (
3574+ {" --log-colors" }, " [on|off|auto]" ,
3575+ " Set colored logging ('on', 'off', or 'auto', default: 'auto')\n "
3576+ " 'auto' enables colors when output is to a terminal" ,
3577+ [](common_params &, const std::string & value) {
3578+ if (is_truthy (value)) {
3579+ common_log_set_colors (common_log_main (), LOG_COLORS_ENABLED);
3580+ } else if (is_falsey (value)) {
3581+ common_log_set_colors (common_log_main (), LOG_COLORS_DISABLED);
3582+ } else if (is_autoy (value)) {
3583+ common_log_set_colors (common_log_main (), LOG_COLORS_AUTO);
3584+ } else {
3585+ throw std::invalid_argument (
3586+ string_format (" error: unkown value for --log-colors: '%s'\n " , value.c_str ()));
3587+ }
3588+ }
3589+ ).set_env (" LLAMA_LOG_COLORS" ));
35883590 add_opt (common_arg (
35893591 {" -v" , " --verbose" , " --log-verbose" },
35903592 " Set verbosity level to infinity (i.e. log all messages, useful for debugging)" ,
@@ -3850,7 +3852,87 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
38503852 }
38513853 ).set_examples ({LLAMA_EXAMPLE_TTS}));
38523854
3853- // model-specific
3855+ add_opt (common_arg (
3856+ {" --diffusion-steps" }, " N" ,
3857+ string_format (" number of diffusion steps (default: %d)" , params.diffusion .steps ),
3858+ [](common_params & params, int value) { params.diffusion .steps = value; }
3859+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3860+ add_opt (common_arg (
3861+ {" --diffusion-visual" },
3862+ string_format (" enable visual diffusion mode (show progressive generation) (default: %s)" , params.diffusion .visual_mode ? " true" : " false" ),
3863+ [](common_params & params) { params.diffusion .visual_mode = true ; }
3864+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3865+ add_opt (common_arg (
3866+ {" --diffusion-eps" }, " F" ,
3867+ string_format (" epsilon for timesteps (default: %.6f)" , (double ) params.diffusion .eps ),
3868+ [](common_params & params, const std::string & value) { params.diffusion .eps = std::stof (value); }
3869+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3870+ add_opt (common_arg (
3871+ {" --diffusion-algorithm" }, " N" ,
3872+ string_format (" diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)" , params.diffusion .algorithm ),
3873+ [](common_params & params, int value) { params.diffusion .algorithm = value; }
3874+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3875+ add_opt (common_arg (
3876+ {" --diffusion-alg-temp" }, " F" ,
3877+ string_format (" dream algorithm temperature (default: %.3f)" , (double ) params.diffusion .alg_temp ),
3878+ [](common_params & params, const std::string & value) { params.diffusion .alg_temp = std::stof (value); }
3879+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3880+ add_opt (common_arg (
3881+ {" --diffusion-block-length" }, " N" ,
3882+ string_format (" llada block length for generation (default: %d)" , params.diffusion .block_length ),
3883+ [](common_params & params, int value) { params.diffusion .block_length = value; }
3884+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3885+ add_opt (common_arg (
3886+ {" --diffusion-cfg-scale" }, " F" ,
3887+ string_format (" llada classifier-free guidance scale (default: %.3f)" , (double ) params.diffusion .cfg_scale ),
3888+ [](common_params & params, const std::string & value) { params.diffusion .cfg_scale = std::stof (value); }
3889+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3890+ add_opt (common_arg (
3891+ {" --diffusion-add-gumbel-noise" }, " F" ,
3892+ string_format (" add gumbel noise to the logits if temp > 0.0 (default: %s)" , params.diffusion .add_gumbel_noise ? " true" : " false" ),
3893+ [](common_params & params, const std::string & value) { params.diffusion .add_gumbel_noise = std::stof (value); }
3894+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3895+ add_opt (common_arg (
3896+ { " -lr" , " --learning-rate" }, " ALPHA" ,
3897+ string_format (" adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)" , (double ) params.lr .lr0 ),
3898+ [](common_params & params, const std::string & value) { params.lr .lr0 = std::stof (value); }
3899+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3900+ add_opt (common_arg ({ " -lr-min" , " --learning-rate-min" }, " ALPHA" ,
3901+ string_format (" (if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)" ,
3902+ (double ) params.lr .lr_min ),
3903+ [](common_params & params, const std::string & value) { params.lr .lr_min = std::stof (value); }
3904+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3905+ add_opt (common_arg (
3906+ {" -decay-epochs" , " --learning-rate-decay-epochs" }, " ALPHA" ,
3907+ string_format (" (if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)" , (double ) params.lr .decay_epochs ),
3908+ [](common_params & params, const std::string & value) { params.lr .decay_epochs = std::stof (value); }
3909+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3910+ add_opt (common_arg (
3911+ {" -wd" , " --weight-decay" }, " WD" ,
3912+ string_format (" adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g)." , (double ) params.lr .wd ),
3913+ [](common_params & params, const std::string & value) { params.lr .wd = std::stof (value); }
3914+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3915+ add_opt (common_arg (
3916+ {" -val-split" , " --val-split" }, " FRACTION" ,
3917+ string_format (" fraction of data to use as validation set for training (default: %.2g)." , (double ) params.val_split ),
3918+ [](common_params & params, const std::string & value) { params.val_split = std::stof (value); }
3919+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3920+ add_opt (common_arg (
3921+ {" -epochs" , " --epochs" }, " N" ,
3922+ string_format (" optimizer max # of epochs (default: %d)" , params.lr .epochs ),
3923+ [](common_params & params, int epochs) { params.lr .epochs = epochs; }
3924+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3925+ add_opt (common_arg (
3926+ {" -opt" , " --optimizer" }, " sgd|adamw" , " adamw or sgd" ,
3927+ [](common_params & params, const std::string & name) {
3928+ params.optimizer = common_opt_get_optimizer (name.c_str ());
3929+ if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
3930+ throw std::invalid_argument (" invalid --optimizer, valid options: adamw, sgd" );
3931+ }
3932+ }
3933+ ).set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
3934+
3935+ // presets
38543936 add_opt (common_arg (
38553937 {" --tts-oute-default" },
38563938 string_format (" use default OuteTTS models (note: can download weights from the internet)" ),
@@ -3863,39 +3945,16 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
38633945 ).set_examples ({LLAMA_EXAMPLE_TTS}));
38643946
38653947 add_opt (common_arg (
3866- {" --embd-bge-small-en-default" },
3867- string_format (" use default bge-small-en-v1.5 model (note: can download weights from the internet)" ),
3868- [](common_params & params) {
3869- params.model .hf_repo = " ggml-org/bge-small-en-v1.5-Q8_0-GGUF" ;
3870- params.model .hf_file = " bge-small-en-v1.5-q8_0.gguf" ;
3871- params.embd_normalize = 2 ;
3872- params.n_ctx = 512 ;
3873- params.verbose_prompt = true ;
3874- params.embedding = true ;
3875- }
3876- ).set_examples ({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
3877-
3878- add_opt (common_arg (
3879- {" --embd-e5-small-en-default" },
3880- string_format (" use default e5-small-v2 model (note: can download weights from the internet)" ),
3881- [](common_params & params) {
3882- params.model .hf_repo = " ggml-org/e5-small-v2-Q8_0-GGUF" ;
3883- params.model .hf_file = " e5-small-v2-q8_0.gguf" ;
3884- params.embd_normalize = 2 ;
3885- params.n_ctx = 512 ;
3886- params.verbose_prompt = true ;
3887- params.embedding = true ;
3888- }
3889- ).set_examples ({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
3890-
3891- add_opt (common_arg (
3892- {" --embd-gte-small-default" },
3893- string_format (" use default gte-small model (note: can download weights from the internet)" ),
3948+ {" --embd-gemma-default" },
3949+ string_format (" use default EmbeddingGemma model (note: can download weights from the internet)" ),
38943950 [](common_params & params) {
3895- params.model .hf_repo = " ggml-org/gte-small-Q8_0-GGUF" ;
3896- params.model .hf_file = " gte-small-q8_0.gguf" ;
3897- params.embd_normalize = 2 ;
3898- params.n_ctx = 512 ;
3951+ params.model .hf_repo = " ggml-org/embeddinggemma-300M-qat-q4_0-GGUF" ;
3952+ params.model .hf_file = " embeddinggemma-300M-qat-Q4_0.gguf" ;
3953+ params.port = 8011 ;
3954+ params.n_ubatch = 2048 ;
3955+ params.n_batch = 2048 ;
3956+ params.n_parallel = 32 ;
3957+ params.n_ctx = 2048 *params.n_parallel ;
38993958 params.verbose_prompt = true ;
39003959 params.embedding = true ;
39013960 }
@@ -3990,96 +4049,65 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
39904049 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
39914050
39924051 add_opt (common_arg (
3993- { " --diffusion-steps" }, " N" ,
3994- string_format (" number of diffusion steps (default: %d)" , params.diffusion .steps ),
3995- [](common_params & params, int value) { params.diffusion .steps = value; }
3996- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3997- add_opt (common_arg (
3998- { " --diffusion-visual" },
3999- string_format (" enable visual diffusion mode (show progressive generation) (default: %s)" ,
4000- params.diffusion .visual_mode ? " true" : " false" ),
4001- [](common_params & params) { params.diffusion .visual_mode = true ; }
4002- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4052+ {" --gpt-oss-20b-default" },
4053+ string_format (" use gpt-oss-20b (note: can download weights from the internet)" ),
4054+ [](common_params & params) {
4055+ params.model .hf_repo = " ggml-org/gpt-oss-20b-GGUF" ;
4056+ params.model .hf_file = " gpt-oss-20b-mxfp4.gguf" ;
4057+ params.port = 8013 ;
4058+ params.n_ubatch = 2048 ;
4059+ params.n_batch = 32768 ;
4060+ params.n_parallel = 2 ;
4061+ params.n_ctx = 131072 *params.n_parallel ;
4062+ params.sampling .temp = 1 .0f ;
4063+ params.sampling .top_p = 1 .0f ;
4064+ params.sampling .top_k = 0 ;
4065+ params.sampling .min_p = 0 .01f ;
4066+ params.use_jinja = true ;
4067+ // params.default_template_kwargs["reasoning_effort"] = "\"high\"";
4068+ }
4069+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40034070
40044071 add_opt (common_arg (
4005- { " --diffusion-eps" }, " F" ,
4006- string_format (" epsilon for timesteps (default: %.6f)" , (double ) params.diffusion .eps ),
4007- [](common_params & params, const std::string & value) { params.diffusion .eps = std::stof (value); }
4008- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4009- add_opt (common_arg (
4010- { " --diffusion-algorithm" }, " N" ,
4011- string_format (" diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)" ,
4012- params.diffusion .algorithm ),
4013- [](common_params & params, int value) { params.diffusion .algorithm = value; }
4014- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4015- add_opt (common_arg (
4016- { " --diffusion-alg-temp" }, " F" ,
4017- string_format (" dream algorithm temperature (default: %.3f)" , (double ) params.diffusion .alg_temp ),
4018- [](common_params & params, const std::string & value) { params.diffusion .alg_temp = std::stof (value); }
4019- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4072+ {" --gpt-oss-120b-default" },
4073+ string_format (" use gpt-oss-120b (note: can download weights from the internet)" ),
4074+ [](common_params & params) {
4075+ params.model .hf_repo = " ggml-org/gpt-oss-120b-GGUF" ;
4076+ params.port = 8013 ;
4077+ params.n_ubatch = 2048 ;
4078+ params.n_batch = 32768 ;
4079+ params.n_parallel = 2 ;
4080+ params.n_ctx = 131072 *params.n_parallel ;
4081+ params.sampling .temp = 1 .0f ;
4082+ params.sampling .top_p = 1 .0f ;
4083+ params.sampling .top_k = 0 ;
4084+ params.sampling .min_p = 0 .01f ;
4085+ params.use_jinja = true ;
4086+ // params.default_template_kwargs["reasoning_effort"] = "\"high\"";
4087+ }
4088+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40204089
40214090 add_opt (common_arg (
4022- { " --diffusion-block-length" }, " N" ,
4023- string_format (" llada block length for generation (default: %d)" , params.diffusion .block_length ),
4024- [](common_params & params, int value) { params.diffusion .block_length = value; }
4025- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4026- add_opt (common_arg (
4027- { " --diffusion-cfg-scale" }, " F" ,
4028- string_format (" llada classifier-free guidance scale (default: %.3f)" , (double ) params.diffusion .cfg_scale ),
4029- [](common_params & params, const std::string & value) { params.diffusion .cfg_scale = std::stof (value); }
4030- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4031- add_opt (common_arg (
4032- { " --diffusion-add-gumbel-noise" }, " F" ,
4033- string_format (" add gumbel noise to the logits if temp > 0.0 (default: %s)" , params.diffusion .add_gumbel_noise ? " true" : " false" ),
4034- [](common_params & params, const std::string & value) { params.diffusion .add_gumbel_noise = std::stof (value); }
4035- ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
4036-
4091+ {" --vision-gemma-4b-default" },
4092+ string_format (" use Gemma 3 4B QAT (note: can download weights from the internet)" ),
4093+ [](common_params & params) {
4094+ params.model .hf_repo = " ggml-org/gemma-3-4b-it-qat-GGUF" ;
4095+ params.port = 8014 ;
4096+ params.n_ctx = 0 ;
4097+ params.use_jinja = true ;
4098+ }
4099+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40374100
4038- add_opt (
4039- common_arg ({ " -lr" , " --learning-rate" }, " ALPHA" ,
4040- string_format (
4041- " adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)" ,
4042- (double ) params.lr .lr0 ),
4043- [](common_params & params, const std::string & value) { params.lr .lr0 = std::stof (value); })
4044- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4045- add_opt (
4046- common_arg ({ " -lr-min" , " --learning-rate-min" }, " ALPHA" ,
4047- string_format (
4048- " (if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)" ,
4049- (double ) params.lr .lr_min ),
4050- [](common_params & params, const std::string & value) { params.lr .lr_min = std::stof (value); })
4051- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4052- add_opt (
4053- common_arg ({ " -decay-epochs" , " --learning-rate-decay-epochs" }, " ALPHA" ,
4054- string_format (
4055- " (if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)" ,
4056- (double ) params.lr .decay_epochs ),
4057- [](common_params & params, const std::string & value) { params.lr .decay_epochs = std::stof (value); })
4058- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4059- add_opt (common_arg (
4060- { " -wd" , " --weight-decay" }, " WD" ,
4061- string_format (
4062- " adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g)." ,
4063- (double ) params.lr .wd ),
4064- [](common_params & params, const std::string & value) { params.lr .wd = std::stof (value); })
4065- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4066- add_opt (common_arg ({ " -val-split" , " --val-split" }, " FRACTION" ,
4067- string_format (" fraction of data to use as validation set for training (default: %.2g)." ,
4068- (double ) params.val_split ),
4069- [](common_params & params, const std::string & value) { params.val_split = std::stof (value); })
4070- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4071- add_opt (common_arg ({ " -epochs" , " --epochs" }, " N" ,
4072- string_format (" optimizer max # of epochs (default: %d)" , params.lr .epochs ),
4073- [](common_params & params, int epochs) { params.lr .epochs = epochs; })
4074- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4075- add_opt (common_arg ({ " -opt" , " --optimizer" }, " sgd|adamw" , " adamw or sgd" ,
4076- [](common_params & params, const std::string & name) {
4077- params.optimizer = common_opt_get_optimizer (name.c_str ());
4078- if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
4079- throw std::invalid_argument (" invalid --optimizer, valid options: adamw, sgd" );
4080- }
4081- })
4082- .set_examples ({ LLAMA_EXAMPLE_FINETUNE }));
4101+ add_opt (common_arg (
4102+ {" --vision-gemma-12b-default" },
4103+ string_format (" use Gemma 3 12B QAT (note: can download weights from the internet)" ),
4104+ [](common_params & params) {
4105+ params.model .hf_repo = " ggml-org/gemma-3-12b-it-qat-GGUF" ;
4106+ params.port = 8014 ;
4107+ params.n_ctx = 0 ;
4108+ params.use_jinja = true ;
4109+ }
4110+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
40834111
40844112 return ctx_arg;
40854113}
0 commit comments