@@ -977,6 +977,10 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
977977        for  (auto  & seq_breaker : params.sampling .dry_sequence_breakers ) {
978978            string_process_escapes (seq_breaker);
979979        }
980+         for  (auto  & pair : params.speculative .replacements ) {
981+             string_process_escapes (pair.first );
982+             string_process_escapes (pair.second );
983+         }
980984    }
981985
982986    if  (!params.kv_overrides .empty ()) {
@@ -2091,6 +2095,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20912095            params.no_kv_offload  = true ;
20922096        }
20932097    ).set_env (" LLAMA_ARG_NO_KV_OFFLOAD"  ));
2098+     add_opt (common_arg (
2099+         {" -nr"  , " --no-repack"  },
2100+         " disable weight repacking"  ,
2101+         [](common_params & params) {
2102+             params.no_extra_bufts  = true ;
2103+         }
2104+     ).set_env (" LLAMA_ARG_NO_REPACK"  ));
20942105    add_opt (common_arg (
20952106        {" -ctk"  , " --cache-type-k"  }, " TYPE"  ,
20962107        string_format (
@@ -2369,6 +2380,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23692380            }
23702381        }
23712382    ));
2383+     add_opt (common_arg (
2384+         {" --cpu-moe"  },
2385+         " use CPU for Mixture of Experts (MoE) weights"  ,
2386+         [](common_params & params) {
2387+             params.tensor_buft_overrides .push_back ({" \\ .ffn_up_exps\\ .weight$"  ,   ggml_backend_cpu_buffer_type ()});
2388+             params.tensor_buft_overrides .push_back ({" \\ .ffn_down_exps\\ .weight$"  , ggml_backend_cpu_buffer_type ()});
2389+             params.tensor_buft_overrides .push_back ({" \\ .ffn_gate_exps\\ .weight$"  , ggml_backend_cpu_buffer_type ()});
2390+         }
2391+     ).set_env (" LLAMA_ARG_CPU_MOE"  ));
23722392    add_opt (common_arg (
23732393        {" -ngl"  , " --gpu-layers"  , " --n-gpu-layers"  }, " N"  ,
23742394        " number of layers to store in VRAM"  ,
@@ -3249,6 +3269,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32493269            params.speculative .model .path  = value;
32503270        }
32513271    ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_MODEL_DRAFT"  ));
3272+     add_opt (common_arg (
3273+         {" --spec-replace"  }, " TARGET"  , " DRAFT"  ,
3274+         " translate the string in TARGET into DRAFT if the draft model and main model are not compatible"  ,
3275+         [](common_params & params, const  std::string & tgt, const  std::string & dft) {
3276+             params.speculative .replacements .push_back ({ tgt, dft });
3277+         }
3278+     ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
32523279    add_opt (common_arg (
32533280        {" -ctkd"  , " --cache-type-k-draft"  }, " TYPE"  ,
32543281        string_format (
@@ -3438,34 +3465,51 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34383465        }
34393466    ).set_examples ({LLAMA_EXAMPLE_SERVER}));
34403467
3441-     //  diffusion parameters
34423468    add_opt (common_arg (
34433469        { " --diffusion-steps"   }, " N"  ,
34443470        string_format (" number of diffusion steps (default: %d)"  , params.diffusion .steps ),
34453471        [](common_params & params, int  value) { params.diffusion .steps  = value; }
34463472    ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3473+     add_opt (common_arg (
3474+         { " --diffusion-visual"   },
3475+         string_format (" enable visual diffusion mode (show progressive generation) (default: %s)"  ,
3476+                       params.diffusion .visual_mode  ? " true"   : " false"  ),
3477+         [](common_params & params) { params.diffusion .visual_mode  = true ; }
3478+     ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3479+ 
34473480    add_opt (common_arg (
34483481        { " --diffusion-eps"   }, " F"  ,
34493482        string_format (" epsilon for timesteps (default: %.6f)"  , (double ) params.diffusion .eps ),
34503483        [](common_params & params, const  std::string & value) { params.diffusion .eps  = std::stof (value); }
34513484    ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
34523485    add_opt (common_arg (
34533486        { " --diffusion-algorithm"   }, " N"  ,
3454-         string_format (" diffusion algorithm: 0=ORIGIN, 1=MASKGIT_PLUS , 2=TOPK_MARGIN , 3=ENTROPY  (default: %d)"  ,
3487+         string_format (" diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED , 2=MARGIN_BASED , 3=RANDOM, 4=LOW_CONFIDENCE  (default: %d)"  ,
34553488                      params.diffusion .algorithm ),
34563489        [](common_params & params, int  value) { params.diffusion .algorithm  = value; }
34573490    ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
34583491    add_opt (common_arg (
34593492        { " --diffusion-alg-temp"   }, " F"  ,
3460-         string_format (" algorithm temperature (default: %.3f)"  , (double ) params.diffusion .alg_temp ),
3493+         string_format (" dream  algorithm temperature (default: %.3f)"  , (double ) params.diffusion .alg_temp ),
34613494        [](common_params & params, const  std::string & value) { params.diffusion .alg_temp  = std::stof (value); }
34623495    ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3496+ 
34633497    add_opt (common_arg (
3464-         { " --diffusion-visual"   },
3465-         string_format (" enable visual diffusion mode (show progressive generation) (default: %s)"  ,
3466-                       params.diffusion .visual_mode  ? " true"   : " false"  ),
3467-         [](common_params & params) { params.diffusion .visual_mode  = true ; }
3498+         { " --diffusion-block-length"   }, " N"  ,
3499+         string_format (" llada block length for generation (default: %d)"  , params.diffusion .block_length ),
3500+         [](common_params & params, int  value) { params.diffusion .block_length  = value; }
3501+     ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3502+     add_opt (common_arg (
3503+         { " --diffusion-cfg-scale"   }, " F"  ,
3504+         string_format (" llada classifier-free guidance scale (default: %.3f)"  , (double ) params.diffusion .cfg_scale ),
3505+         [](common_params & params, const  std::string & value) { params.diffusion .cfg_scale  = std::stof (value); }
34683506    ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3507+     add_opt (common_arg (
3508+         { " --diffusion-add-gumbel-noise"   }, " F"  ,
3509+         string_format (" add gumbel noise to the logits if temp > 0.0 (default: %s)"  , params.diffusion .add_gumbel_noise  ? " true"   : " false"  ),
3510+         [](common_params & params, const  std::string & value) { params.diffusion .add_gumbel_noise  = std::stof (value); }
3511+     ).set_examples ({ LLAMA_EXAMPLE_DIFFUSION }));
3512+ 
34693513
34703514    return  ctx_arg;
34713515}
0 commit comments