Skip to content

Commit c5ac2b8

Browse files
committed
server : clean up built-in template detection
1 parent 6e1531a commit c5ac2b8

File tree

4 files changed

+25
-27
lines changed

4 files changed

+25
-27
lines changed

common/common.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,19 @@ std::string common_detokenize(llama_context * ctx, const std::vector<llama_token
16141614
// Chat template utils
16151615
//
16161616

1617+
std::string common_get_builtin_chat_template(const struct llama_model * model) {
1618+
static const char * template_key = "tokenizer.chat_template";
1619+
// call with NULL buffer to get the total size of the string
1620+
int32_t res = llama_model_meta_val_str(model, template_key, NULL, 0);
1621+
if (res < 2) {
1622+
return "";
1623+
} else {
1624+
std::vector<char> model_template(res + 1, 0);
1625+
llama_model_meta_val_str(model, template_key, model_template.data(), model_template.size());
1626+
return std::string(model_template.data(), model_template.size() - 1);
1627+
}
1628+
}
1629+
16171630
bool common_chat_verify_template(const std::string & tmpl) {
16181631
llama_chat_message chat[] = {{"user", "test"}};
16191632
int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);

common/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,9 @@ struct common_chat_msg {
571571
std::string content;
572572
};
573573

574+
// Get the built-in chat template for the model. Return empty string if not present.
575+
std::string common_get_builtin_chat_template(const struct llama_model * model);
576+
574577
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
575578
bool common_chat_verify_template(const std::string & tmpl);
576579

examples/server/server.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,17 +1623,10 @@ struct server_context {
16231623
return true;
16241624
}
16251625

1626-
bool validate_model_chat_template() const {
1627-
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
1628-
std::string template_key = "tokenizer.chat_template";
1629-
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
1630-
if (res >= 0) {
1631-
llama_chat_message chat[] = {{"user", "test"}};
1632-
std::string tmpl = std::string(model_template.data(), model_template.size());
1633-
int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
1634-
return chat_res > 0;
1635-
}
1636-
return false;
1626+
bool validate_builtin_chat_template() const {
1627+
llama_chat_message chat[] = {{"user", "test"}};
1628+
int32_t chat_res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
1629+
return chat_res > 0;
16371630
}
16381631

16391632
void init() {
@@ -3484,7 +3477,7 @@ int main(int argc, char ** argv) {
34843477
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
34853478
{ "total_slots", ctx_server.params_base.n_parallel },
34863479
{ "model_path", ctx_server.params_base.model },
3487-
{ "chat_template", llama_get_chat_template(ctx_server.model) },
3480+
{ "chat_template", common_get_builtin_chat_template(ctx_server.model) },
34883481
{ "build_info", build_info },
34893482
};
34903483

@@ -4113,14 +4106,16 @@ int main(int argc, char ** argv) {
41134106

41144107
// if a custom chat template is not supplied, we will use the one that comes with the model (if any)
41154108
if (params.chat_template.empty()) {
4116-
if (!ctx_server.validate_model_chat_template()) {
4109+
if (!ctx_server.validate_builtin_chat_template()) {
41174110
LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
41184111
params.chat_template = "chatml";
41194112
}
41204113
}
41214114

41224115
// print sample chat example to make it clear which template is used
4123-
LOG_INF("%s: chat template, built_in: %d, chat_example: '%s'\n", __func__, params.chat_template.empty(), common_chat_format_example(ctx_server.model, params.chat_template).c_str());
4116+
LOG_INF("%s: chat template, chat_template: %d, example_format: '%s'\n", __func__,
4117+
params.chat_template.empty() ? "(built-in)" : params.chat_template,
4118+
common_chat_format_example(ctx_server.model, params.chat_template).c_str());
41244119

41254120
ctx_server.queue_tasks.on_new_task(std::bind(
41264121
&server_context::process_single_task, &ctx_server, std::placeholders::_1));

examples/server/utils.hpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -382,19 +382,6 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
382382
return formatted_chat;
383383
}
384384

385-
static std::string llama_get_chat_template(const struct llama_model * model) {
386-
std::string template_key = "tokenizer.chat_template";
387-
// call with NULL buffer to get the total size of the string
388-
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), NULL, 0);
389-
if (res < 2) {
390-
return "";
391-
} else {
392-
std::vector<char> model_template(res + 1, 0);
393-
llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
394-
return std::string(model_template.data(), model_template.size() - 1);
395-
}
396-
}
397-
398385
//
399386
// base64 utils (TODO: move to common in the future)
400387
//

0 commit comments

Comments
 (0)