Skip to content

Commit c9e8fdd

Browse files
author
ochafik
committed
Move chat_templates inside server_context + remove mutex
1 parent db9dd0c commit c9e8fdd

File tree

1 file changed

+12
-22
lines changed

1 file changed

+12
-22
lines changed

examples/server/server.cpp

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,8 @@ struct server_context {
16621662
// Necessary similarity of prompt for slot selection
16631663
float slot_prompt_similarity = 0.0f;
16641664

1665+
llama_chat_templates chat_templates;
1666+
16651667
~server_context() {
16661668
// Clear any sampling context
16671669
for (server_slot & slot : slots) {
@@ -1738,6 +1740,8 @@ struct server_context {
17381740
cparams_dft.type_v = GGML_TYPE_F16;
17391741
}
17401742

1743+
chat_templates = common_chat_templates_from_model(model, params_base.chat_template);
1744+
17411745
return true;
17421746
}
17431747

@@ -3625,30 +3629,17 @@ int main(int argc, char ** argv) {
36253629
}
36263630
};
36273631

3628-
std::mutex chat_templates_mutex;
3629-
std::optional<llama_chat_templates> chat_templates;
3630-
3631-
auto get_chat_templates = [&ctx_server, &chat_templates_mutex, &chat_templates]() -> const llama_chat_templates & {
3632-
std::lock_guard<std::mutex> lock(chat_templates_mutex);
3633-
if (!chat_templates) {
3634-
chat_templates = common_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template);
3635-
GGML_ASSERT(chat_templates->template_default);
3636-
}
3637-
return *chat_templates;
3638-
};
3639-
3640-
const auto handle_props = [&ctx_server, &res_ok, &get_chat_templates](const httplib::Request &, httplib::Response & res) {
3632+
const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
36413633
// this endpoint is publicly available, please only return what is safe to be exposed
3642-
const auto & templates = get_chat_templates();
36433634
json data = {
36443635
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
36453636
{ "total_slots", ctx_server.params_base.n_parallel },
36463637
{ "model_path", ctx_server.params_base.model },
3647-
{ "chat_template", templates.template_default->source() },
3638+
{ "chat_template", ctx_server.chat_templates.template_default->source() },
36483639
{ "build_info", build_info },
36493640
};
3650-
if (ctx_server.params_base.use_jinja && templates.template_tool_use) {
3651-
data["chat_template_tool_use"] = templates.template_tool_use->source();
3641+
if (ctx_server.params_base.use_jinja && ctx_server.chat_templates.template_tool_use) {
3642+
data["chat_template_tool_use"] = ctx_server.chat_templates.template_tool_use->source();
36523643
}
36533644

36543645
res_ok(res, data);
@@ -3863,15 +3854,14 @@ int main(int argc, char ** argv) {
38633854
OAICOMPAT_TYPE_NONE); // infill is not OAI compatible
38643855
};
38653856

3866-
const auto handle_chat_completions = [&ctx_server, &params, &res_error, &handle_completions_impl, &get_chat_templates](const httplib::Request & req, httplib::Response & res) {
3857+
const auto handle_chat_completions = [&ctx_server, &params, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
38673858
if (ctx_server.params_base.embedding) {
38683859
res_error(res, format_error_response("This server does not support completions. Start it without `--embeddings`", ERROR_TYPE_NOT_SUPPORTED));
38693860
return;
38703861
}
38713862

38723863
auto body = json::parse(req.body);
3873-
const auto & templates = get_chat_templates();
3874-
const auto & chat_template = body.contains("tools") && templates.template_tool_use ? *templates.template_tool_use : *templates.template_default;
3864+
const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
38753865
json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
38763866

38773867
return handle_completions_impl(
@@ -4290,8 +4280,8 @@ int main(int argc, char ** argv) {
42904280

42914281
// print sample chat example to make it clear which template is used
42924282
LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
4293-
get_chat_templates().template_default->source().c_str(),
4294-
common_chat_format_example(*get_chat_templates().template_default, ctx_server.params_base.use_jinja).c_str());
4283+
ctx_server.chat_templates.template_default->source().c_str(),
4284+
common_chat_format_example(*ctx_server.chat_templates.template_default, ctx_server.params_base.use_jinja).c_str());
42954285

42964286
ctx_server.queue_tasks.on_new_task(std::bind(
42974287
&server_context::process_single_task, &ctx_server, std::placeholders::_1));

0 commit comments

Comments
 (0)