@@ -133,7 +133,8 @@ static void common_params_handle_model_default(
133133 const std::string & model_url,
134134 std::string & hf_repo,
135135 std::string & hf_file,
136- const std::string & hf_token) {
136+ const std::string & hf_token,
137+ const std::string & model_default) {
137138 if (!hf_repo.empty ()) {
138139 // short-hand to avoid specifying --hf-file -> default it to --model
139140 if (hf_file.empty ()) {
@@ -163,7 +164,7 @@ static void common_params_handle_model_default(
163164 model = fs_get_cache_file (string_split<std::string>(f, ' /' ).back ());
164165 }
165166 } else if (model.empty ()) {
166- model = DEFAULT_MODEL_PATH ;
167+ model = model_default ;
167168 }
168169}
169170
@@ -299,8 +300,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
299300 }
300301
301302 // TODO: refactor model params in a common struct
302- common_params_handle_model_default (params.model , params.model_url , params.hf_repo , params.hf_file , params.hf_token );
303- common_params_handle_model_default (params.vocoder .model , params.vocoder .model_url , params.vocoder .hf_repo , params.vocoder .hf_file , params.hf_token );
303+ common_params_handle_model_default (params.model , params.model_url , params.hf_repo , params.hf_file , params.hf_token , DEFAULT_MODEL_PATH);
304+ common_params_handle_model_default (params.speculative .model , params.speculative .model_url , params.speculative .hf_repo , params.speculative .hf_file , params.hf_token , " " );
305+ common_params_handle_model_default (params.vocoder .model , params.vocoder .model_url , params.vocoder .hf_repo , params.vocoder .hf_file , params.hf_token , " " );
304306
305307 if (params.escape ) {
306308 string_process_escapes (params.prompt );
@@ -323,6 +325,14 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
323325 throw std::invalid_argument (" error: either --embedding or --reranking can be specified, but not both" );
324326 }
325327
328+ if (!params.chat_template .empty () && !common_chat_verify_template (params.chat_template , params.use_jinja )) {
329+ throw std::runtime_error (string_format (
330+ " error: the supplied chat template is not supported: %s%s\n " ,
331+ params.chat_template .c_str (),
332+ params.use_jinja ? " " : " \n note: llama.cpp was started without --jinja, we only support commonly used templates"
333+ ));
334+ }
335+
326336 return true ;
327337}
328338
@@ -1629,6 +1639,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16291639 params.hf_repo = value;
16301640 }
16311641 ).set_env (" LLAMA_ARG_HF_REPO" ));
1642+ add_opt (common_arg (
1643+ {" -hfd" , " -hfrd" , " --hf-repo-draft" }, " <user>/<model>[:quant]" ,
1644+ " Same as --hf-repo, but for the draft model (default: unused)" ,
1645+ [](common_params & params, const std::string & value) {
1646+ params.speculative .hf_repo = value;
1647+ }
1648+ ).set_env (" LLAMA_ARG_HFD_REPO" ));
16321649 add_opt (common_arg (
16331650 {" -hff" , " --hf-file" }, " FILE" ,
16341651 " Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)" ,
@@ -1938,24 +1955,44 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
19381955 }
19391956 }
19401957 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
1958+ add_opt (common_arg (
1959+ {" --jinja" },
1960+ " use jinja template for chat (default: disabled)" ,
1961+ [](common_params & params) {
1962+ params.use_jinja = true ;
1963+ }
1964+ ).set_examples ({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env (" LLAMA_ARG_JINJA" ));
19411965 add_opt (common_arg (
19421966 {" --chat-template" }, " JINJA_TEMPLATE" ,
19431967 string_format (
19441968 " set custom jinja chat template (default: template taken from model's metadata)\n "
19451969 " if suffix/prefix are specified, template will be disabled\n "
1970+ " only commonly used templates are accepted (unless --jinja is set before this flag):\n "
19461971 " list of built-in templates:\n %s" , list_builtin_chat_templates ().c_str ()
19471972 ),
19481973 [](common_params & params, const std::string & value) {
1949- if (!common_chat_verify_template (value)) {
1950- throw std::runtime_error (string_format (
1951- " error: the supplied chat template is not supported: %s\n "
1952- " note: llama.cpp does not use jinja parser, we only support commonly used templates\n " ,
1953- value.c_str ()
1954- ));
1955- }
19561974 params.chat_template = value;
19571975 }
19581976 ).set_examples ({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_CHAT_TEMPLATE" ));
1977+ add_opt (common_arg (
1978+ {" --chat-template-file" }, " JINJA_TEMPLATE_FILE" ,
1979+ string_format (
1980+ " set custom jinja chat template file (default: template taken from model's metadata)\n "
1981+ " if suffix/prefix are specified, template will be disabled\n "
1982+ " only commonly used templates are accepted (unless --jinja is set before this flag):\n "
1983+ " list of built-in templates:\n %s" , list_builtin_chat_templates ().c_str ()
1984+ ),
1985+ [](common_params & params, const std::string & value) {
1986+ std::ifstream file (value);
1987+ if (!file) {
1988+ throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
1989+ }
1990+ std::copy (
1991+ std::istreambuf_iterator<char >(file),
1992+ std::istreambuf_iterator<char >(),
1993+ std::back_inserter (params.chat_template ));
1994+ }
1995+ ).set_examples ({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_CHAT_TEMPLATE_FILE" ));
19591996 add_opt (common_arg (
19601997 {" -sps" , " --slot-prompt-similarity" }, " SIMILARITY" ,
19611998 string_format (" how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n " , params.slot_prompt_similarity ),
@@ -2254,6 +2291,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22542291 params.vocoder .model = value;
22552292 }
22562293 ).set_examples ({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
2294+ add_opt (common_arg (
2295+ {" --tts-use-guide-tokens" },
2296+ " Use guide tokens to improve TTS word recall" ,
2297+ [](common_params & params) {
2298+ params.vocoder .use_guide_tokens = true ;
2299+ }
2300+ ).set_examples ({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
22572301
22582302 // model-specific
22592303 add_opt (common_arg (
0 commit comments