Skip to content

Commit e63520f

Browse files
author
ochafik
committed
Forward decl minja::chat_template to avoid eager json dep
1 parent ee1e10e commit e63520f

File tree

5 files changed

+40
-21
lines changed

5 files changed

+40
-21
lines changed

common/common.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "json.hpp"
1313
#include "json-schema-to-grammar.h"
1414
#include "llama.h"
15+
#include "chat-template.hpp"
1516

1617
#include <algorithm>
1718
#include <cinttypes>
@@ -1827,11 +1828,18 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model *
18271828
auto eos_token = common_token_to_piece(vocab, llama_vocab_eos(vocab), true);
18281829
std::string default_template_src = chat_template_override;
18291830
std::string tool_use_template_src = chat_template_override;
1831+
bool has_explicit_template = !chat_template_override.empty();
18301832
if (chat_template_override.empty()) {
18311833
auto str = llama_model_chat_template(model, /* name */ nullptr);
1832-
if (str) default_template_src = str;
1834+
if (str) {
1835+
default_template_src = str;
1836+
has_explicit_template = true;
1837+
}
18331838
str = llama_model_chat_template(model, /* name */ "tool_use");
1834-
if (str) tool_use_template_src = str;
1839+
if (str) {
1840+
tool_use_template_src = str;
1841+
has_explicit_template = true;
1842+
}
18351843
}
18361844
if (default_template_src.empty() || default_template_src == "chatml") {
18371845
if (!tool_use_template_src.empty()) {
@@ -1848,9 +1856,11 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model *
18481856
}
18491857
}
18501858
return {
1851-
/* .default_template = */ { default_template_src, bos_token, eos_token },
1852-
/* .tool_use_template = */ tool_use_template_src.empty() ? std::nullopt
1853-
: std::optional<minja::chat_template>({ tool_use_template_src, bos_token, eos_token }),
1859+
has_explicit_template,
1860+
std::move(std::make_unique<minja::chat_template>(default_template_src, bos_token, eos_token)),
1861+
tool_use_template_src.empty()
1862+
? nullptr
1863+
: std::move(std::make_unique<minja::chat_template>(tool_use_template_src, bos_token, eos_token))
18541864
};
18551865
}
18561866

common/common.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#pragma once
44

55
#include "llama-cpp.h"
6-
#include "chat-template.hpp"
76

87
#include <optional>
98
#include <string>
@@ -601,8 +600,18 @@ struct common_chat_msg {
601600
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
602601
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
603602

603+
namespace minja {
604+
class chat_template;
605+
}
606+
604607
typedef minja::chat_template llama_chat_template;
605608

609+
struct llama_chat_templates {
610+
bool has_explicit_template; // Model had builtin template or template overridde was specified.
611+
std::unique_ptr<llama_chat_template> default_template; // always set (defaults to chatml)
612+
std::unique_ptr<llama_chat_template> tool_use_template;
613+
};
614+
606615
// CPP wrapper for llama_chat_apply_template
607616
// If the built-in template is not supported, we default to chatml
608617
// If the custom "tmpl" is not supported, we throw an error
@@ -624,11 +633,6 @@ std::string common_chat_format_single(
624633
std::string common_chat_format_example(
625634
const llama_chat_template & tmpl, bool use_jinja);
626635

627-
struct llama_chat_templates {
628-
llama_chat_template default_template;
629-
std::optional<llama_chat_template> tool_use_template;
630-
};
631-
632636
llama_chat_templates llama_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override);
633637

634638
//

examples/main/main.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "log.h"
55
#include "sampling.h"
66
#include "llama.h"
7+
#include "chat-template.hpp"
78

89
#include <cstdio>
910
#include <cstring>
@@ -200,7 +201,7 @@ int main(int argc, char ** argv) {
200201
}
201202

202203
// auto enable conversation mode if chat template is available
203-
const bool has_chat_template = !chat_templates.default_template.source().empty();
204+
const bool has_chat_template = chat_templates.has_explicit_template && chat_templates.default_template;
204205
if (params.conversation_mode == COMMON_CONVERSATION_MODE_AUTO) {
205206
if (has_chat_template) {
206207
LOG_INF("%s: chat template is available, enabling conversation mode (disable it with -no-cnv)\n", __func__);
@@ -218,7 +219,7 @@ int main(int argc, char ** argv) {
218219
// print chat template example in conversation mode
219220
if (params.conversation_mode) {
220221
if (params.enable_chat_template) {
221-
LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.default_template, params.use_jinja).c_str());
222+
LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(*chat_templates.default_template, params.use_jinja).c_str());
222223
} else {
223224
LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__);
224225
}
@@ -264,7 +265,7 @@ int main(int argc, char ** argv) {
264265

265266
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
266267
common_chat_msg new_msg{role, content};
267-
auto formatted = common_chat_format_single(chat_templates.default_template, chat_msgs, new_msg, role == "user", g_params->use_jinja);
268+
auto formatted = common_chat_format_single(*chat_templates.default_template, chat_msgs, new_msg, role == "user", g_params->use_jinja);
268269
chat_msgs.push_back({role, content});
269270
LOG_DBG("formatted: '%s'\n", formatted.c_str());
270271
return formatted;

examples/run/run.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "common.h"
2727
#include "json.hpp"
2828
#include "llama-cpp.h"
29+
#include "chat-template.hpp"
2930

3031
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
3132
[[noreturn]] static void sigint_handler(int) {
@@ -936,6 +937,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_
936937
int prev_len = 0;
937938
llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get()));
938939
auto chat_templates = llama_chat_templates_from_model(llama_data.model.get(), "");
940+
GGML_ASSERT(chat_templates.default_template);
939941
static const bool stdout_a_terminal = is_stdout_a_terminal();
940942
while (true) {
941943
// Get user input
@@ -946,7 +948,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_
946948

947949
add_message("user", user.empty() ? user_input : user, llama_data);
948950
int new_len;
949-
if (apply_chat_template_with_error_handling(chat_templates.default_template, llama_data, true, new_len, use_jinja) < 0) {
951+
if (apply_chat_template_with_error_handling(*chat_templates.default_template, llama_data, true, new_len, use_jinja) < 0) {
950952
return 1;
951953
}
952954

@@ -961,7 +963,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_
961963
}
962964

963965
add_message("assistant", response, llama_data);
964-
if (apply_chat_template_with_error_handling(chat_templates.default_template, llama_data, false, prev_len, use_jinja) < 0) {
966+
if (apply_chat_template_with_error_handling(*chat_templates.default_template, llama_data, false, prev_len, use_jinja) < 0) {
965967
return 1;
966968
}
967969
}

examples/server/server.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,8 +1745,9 @@ struct server_context {
17451745

17461746
if (use_jinja) {
17471747
auto templates = llama_chat_templates_from_model(model, "");
1748+
GGML_ASSERT(templates.default_template);
17481749
try {
1749-
templates.default_template.apply({{
1750+
templates.default_template->apply({{
17501751
{"role", "user"},
17511752
{"content", "test"},
17521753
}}, json(), true);
@@ -3630,6 +3631,7 @@ int main(int argc, char ** argv) {
36303631
std::lock_guard<std::mutex> lock(chat_templates_mutex);
36313632
if (!chat_templates) {
36323633
chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template);
3634+
GGML_ASSERT(chat_templates->default_template);
36333635
}
36343636
return *chat_templates;
36353637
};
@@ -3641,7 +3643,7 @@ int main(int argc, char ** argv) {
36413643
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
36423644
{ "total_slots", ctx_server.params_base.n_parallel },
36433645
{ "model_path", ctx_server.params_base.model },
3644-
{ "chat_template", templates.default_template.source() },
3646+
{ "chat_template", templates.default_template->source() },
36453647
{ "build_info", build_info },
36463648
};
36473649
if (ctx_server.params_base.use_jinja && templates.tool_use_template) {
@@ -3868,7 +3870,7 @@ int main(int argc, char ** argv) {
38683870

38693871
auto body = json::parse(req.body);
38703872
const auto & templates = get_chat_templates();
3871-
const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : templates.default_template;
3873+
const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : *templates.default_template;
38723874
json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
38733875

38743876
return handle_completions_impl(
@@ -4287,8 +4289,8 @@ int main(int argc, char ** argv) {
42874289

42884290
// print sample chat example to make it clear which template is used
42894291
LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
4290-
get_chat_templates().default_template.source().c_str(),
4291-
common_chat_format_example(get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str());
4292+
get_chat_templates().default_template->source().c_str(),
4293+
common_chat_format_example(*get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str());
42924294

42934295
ctx_server.queue_tasks.on_new_task(std::bind(
42944296
&server_context::process_single_task, &ctx_server, std::placeholders::_1));

0 commit comments

Comments
 (0)