@@ -977,6 +977,10 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
977977 for (auto & seq_breaker : params.sampling .dry_sequence_breakers ) {
978978 string_process_escapes (seq_breaker);
979979 }
980+ for (auto & pair : params.speculative .replacements ) {
981+ string_process_escapes (pair.first );
982+ string_process_escapes (pair.second );
983+ }
980984 }
981985
982986 if (!params.kv_overrides .empty ()) {
@@ -2091,6 +2095,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20912095 params.no_kv_offload = true ;
20922096 }
20932097 ).set_env (" LLAMA_ARG_NO_KV_OFFLOAD" ));
2098+ add_opt (common_arg (
2099+ {" -nr" , " --no-repack" },
2100+ " disable weight repacking" ,
2101+ [](common_params & params) {
2102+ params.no_extra_bufts = true ;
2103+ }
2104+ ).set_env (" LLAMA_ARG_NO_REPACK" ));
20942105 add_opt (common_arg (
20952106 {" -ctk" , " --cache-type-k" }, " TYPE" ,
20962107 string_format (
@@ -3249,6 +3260,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32493260 params.speculative .model .path = value;
32503261 }
32513262 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_MODEL_DRAFT" ));
3263+ add_opt (common_arg (
3264+ {" --spec-replace" }, " TARGET" , " DRAFT" ,
3265+ " translate the string in TARGET into DRAFT if the draft model and main model are not compatible" ,
3266+ [](common_params & params, const std::string & tgt, const std::string & dft) {
3267+ params.speculative .replacements .push_back ({ tgt, dft });
3268+ }
3269+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
32523270 add_opt (common_arg (
32533271 {" -ctkd" , " --cache-type-k-draft" }, " TYPE" ,
32543272 string_format (
@@ -3438,33 +3456,49 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34383456 }
34393457 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
34403458
3441- // diffusion parameters
34423459 add_opt (common_arg (
34433460 { " --diffusion-steps" }, " N" ,
34443461 string_format (" number of diffusion steps (default: %d)" , params.diffusion .steps ),
34453462 [](common_params & params, int value) { params.diffusion .steps = value; }
34463463 ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3464+ add_opt (common_arg (
3465+ { " --diffusion-visual" },
3466+ string_format (" enable visual diffusion mode (show progressive generation) (default: %s)" ,
3467+ params.diffusion .visual_mode ? " true" : " false" ),
3468+ [](common_params & params) { params.diffusion .visual_mode = true ; }
3469+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3470+
34473471 add_opt (common_arg (
34483472 { " --diffusion-eps" }, " F" ,
34493473 string_format (" epsilon for timesteps (default: %.6f)" , (double ) params.diffusion .eps ),
34503474 [](common_params & params, const std::string & value) { params.diffusion .eps = std::stof (value); }
34513475 ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
34523476 add_opt (common_arg (
34533477 { " --diffusion-algorithm" }, " N" ,
3454- string_format (" diffusion algorithm: 0=ORIGIN, 1=MASKGIT_PLUS , 2=TOPK_MARGIN , 3=ENTROPY (default: %d)" ,
3478+ string_format (" diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED , 2=MARGIN_BASED , 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)" ,
34553479 params.diffusion .algorithm ),
34563480 [](common_params & params, int value) { params.diffusion .algorithm = value; }
34573481 ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
34583482 add_opt (common_arg (
34593483 { " --diffusion-alg-temp" }, " F" ,
3460- string_format (" algorithm temperature (default: %.3f)" , (double ) params.diffusion .alg_temp ),
3484+ string_format (" dream algorithm temperature (default: %.3f)" , (double ) params.diffusion .alg_temp ),
34613485 [](common_params & params, const std::string & value) { params.diffusion .alg_temp = std::stof (value); }
34623486 ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3487+
34633488 add_opt (common_arg (
3464- { " --diffusion-visual" },
3465- string_format (" enable visual diffusion mode (show progressive generation) (default: %s)" ,
3466- params.diffusion .visual_mode ? " true" : " false" ),
3467- [](common_params & params) { params.diffusion .visual_mode = true ; }
3489+ { " --diffusion-block-length" }, " N" ,
3490+ string_format (" llada block length for generation (default: %d)" , params.diffusion .block_length ),
3491+ [](common_params & params, int value) { params.diffusion .block_length = value; }
3492+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3493+ add_opt (common_arg (
3494+ { " --diffusion-cfg-scale" }, " F" ,
3495+ string_format (" llada classifier-free guidance scale (default: %.3f)" , (double ) params.diffusion .cfg_scale ),
3496+ [](common_params & params, const std::string & value) { params.diffusion .cfg_scale = std::stof (value); }
3497+ ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3498+ add_opt (common_arg (
3499+ { " --diffusion-add-gumbel-noise" }, " F" ,
3500+ string_format (" add gumbel noise to the logits if temp > 0.0 (default: %s)" , params.diffusion .add_gumbel_noise ? " true" : " false" ),
3501+ [](common_params & params, const std::string & value) { params.diffusion .add_gumbel_noise = std::stof (value); }
34683502 ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
34693503
34703504 // mmojo-server START
0 commit comments