server: add --chat-template-file

ochafik · ochafik · commit 41103c0ed621 · 2024-09-25T16:14:46.000+01:00
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -1868,6 +1868,33 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             params.chat_template = value;
         }
     ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
+    add_opt(llama_arg(
+        {"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
+        "set custom jinja chat template file (default: template taken from model's metadata)\n"
+        "if suffix/prefix are specified, template will be disabled\n"
+        "only commonly used templates are accepted (unless --jinja is set before this flag):\n"
+        "https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template",
+        [](gpt_params & params, const std::string & value) {
+            std::ifstream file(value);
+            if (!file) {
+                throw std::runtime_error(format("error: failed to open file '%s'\n", value.c_str()));
+            }
+            std::string chat_template;
+            std::copy(
+                std::istreambuf_iterator<char>(file),
+                std::istreambuf_iterator<char>(),
+                std::back_inserter(chat_template)
+            );
+            if (!llama_chat_verify_template(chat_template, params.use_jinja)) {
+                throw std::runtime_error(format(
+                    "error: the supplied chat template is not supported: %s\n"
+                    "note: llama.cpp does not use jinja parser, we only support commonly used templates\n",
+                    chat_template.c_str()
+                ));
+            }
+            params.chat_template = chat_template;
+        }
+    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
     add_opt(llama_arg(
         {"-sps", "--slot-prompt-similarity"}, "SIMILARITY",
         format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity),