@@ -591,7 +591,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
591591 [](common_params & params) {
592592 params.ctx_shift = false ;
593593 }
594- ).set_examples ({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_NO_CONTEXT_SHIFT" ));
594+ ).set_examples ({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX }).set_env (" LLAMA_ARG_NO_CONTEXT_SHIFT" ));
595595 add_opt (common_arg (
596596 {" --chunks" }, " N" ,
597597 string_format (" max number of chunks to process (default: %d, -1 = all)" , params.n_chunks ),
@@ -1711,6 +1711,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17111711 params.public_path = value;
17121712 }
17131713 ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_STATIC_PATH" ));
1714+ add_opt (common_arg (
1715+ {" --no-webui" },
1716+ string_format (" Disable the Web UI (default: %s)" , params.webui ? " enabled" : " disabled" ),
1717+ [](common_params & params) {
1718+ params.webui = false ;
1719+ }
1720+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_NO_WEBUI" ));
17141721 add_opt (common_arg (
17151722 {" --embedding" , " --embeddings" },
17161723 string_format (" restrict to only support embedding use case; use only with dedicated embedding models (default: %s)" , params.embedding ? " enabled" : " disabled" ),
@@ -2076,35 +2083,35 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20762083 [](common_params & params, int value) {
20772084 params.speculative .n_max = value;
20782085 }
2079- ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}));
2086+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}). set_env ( " LLAMA_ARG_DRAFT_MAX " ) );
20802087 add_opt (common_arg (
20812088 {" --draft-min" , " --draft-n-min" }, " N" ,
20822089 string_format (" minimum number of draft tokens to use for speculative decoding (default: %d)" , params.speculative .n_min ),
20832090 [](common_params & params, int value) {
20842091 params.speculative .n_min = value;
20852092 }
2086- ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}));
2093+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}). set_env ( " LLAMA_ARG_DRAFT_MIN " ) );
20872094 add_opt (common_arg (
20882095 {" --draft-p-split" }, " P" ,
20892096 string_format (" speculative decoding split probability (default: %.1f)" , (double )params.speculative .p_split ),
20902097 [](common_params & params, const std::string & value) {
20912098 params.speculative .p_split = std::stof (value);
20922099 }
2093- ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}));
2100+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE}). set_env ( " LLAMA_ARG_DRAFT_P_SPLIT " ) );
20942101 add_opt (common_arg (
20952102 {" --draft-p-min" }, " P" ,
20962103 string_format (" minimum speculative decoding probability (greedy) (default: %.1f)" , (double )params.speculative .p_min ),
20972104 [](common_params & params, const std::string & value) {
20982105 params.speculative .p_min = std::stof (value);
20992106 }
2100- ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2107+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}). set_env ( " LLAMA_ARG_DRAFT_P_MIN " ) );
21012108 add_opt (common_arg (
21022109 {" -cd" , " --ctx-size-draft" }, " N" ,
21032110 string_format (" size of the prompt context for the draft model (default: %d, 0 = loaded from model)" , params.speculative .n_ctx ),
21042111 [](common_params & params, int value) {
21052112 params.speculative .n_ctx = value;
21062113 }
2107- ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2114+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}). set_env ( " LLAMA_ARG_CTX_SIZE_DRAFT " ) );
21082115 add_opt (common_arg (
21092116 {" -devd" , " --device-draft" }, " <dev1,dev2,..>" ,
21102117 " comma-separated list of devices to use for offloading the draft model (none = don't offload)\n "
@@ -2124,14 +2131,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
21242131 fprintf (stderr, " warning: consult docs/build.md for compilation instructions\n " );
21252132 }
21262133 }
2127- ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2134+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}). set_env ( " LLAMA_ARG_N_GPU_LAYERS_DRAFT " ) );
21282135 add_opt (common_arg (
21292136 {" -md" , " --model-draft" }, " FNAME" ,
21302137 " draft model for speculative decoding (default: unused)" ,
21312138 [](common_params & params, const std::string & value) {
21322139 params.speculative .model = value;
21332140 }
2134- ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2141+ ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}). set_env ( " LLAMA_ARG_MODEL_DRAFT " ) );
21352142
21362143 return ctx_arg;
21372144}
0 commit comments