@@ -1662,6 +1662,8 @@ struct server_context {
16621662 // Necessary similarity of prompt for slot selection
16631663 float slot_prompt_similarity = 0 .0f ;
16641664
1665+ llama_chat_templates chat_templates;
1666+
16651667 ~server_context () {
16661668 // Clear any sampling context
16671669 for (server_slot & slot : slots) {
@@ -1738,6 +1740,8 @@ struct server_context {
17381740 cparams_dft.type_v = GGML_TYPE_F16;
17391741 }
17401742
1743+ chat_templates = common_chat_templates_from_model (model, params_base.chat_template );
1744+
17411745 return true ;
17421746 }
17431747
@@ -3625,30 +3629,17 @@ int main(int argc, char ** argv) {
36253629 }
36263630 };
36273631
3628- std::mutex chat_templates_mutex;
3629- std::optional<llama_chat_templates> chat_templates;
3630-
3631- auto get_chat_templates = [&ctx_server, &chat_templates_mutex, &chat_templates]() -> const llama_chat_templates & {
3632- std::lock_guard<std::mutex> lock (chat_templates_mutex);
3633- if (!chat_templates) {
3634- chat_templates = common_chat_templates_from_model (ctx_server.model , ctx_server.params_base .chat_template );
3635- GGML_ASSERT (chat_templates->template_default );
3636- }
3637- return *chat_templates;
3638- };
3639-
3640- const auto handle_props = [&ctx_server, &res_ok, &get_chat_templates](const httplib::Request &, httplib::Response & res) {
3632+ const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
36413633 // this endpoint is publicly available, please only return what is safe to be exposed
3642- const auto & templates = get_chat_templates ();
36433634 json data = {
36443635 { " default_generation_settings" , ctx_server.default_generation_settings_for_props },
36453636 { " total_slots" , ctx_server.params_base .n_parallel },
36463637 { " model_path" , ctx_server.params_base .model },
3647- { " chat_template" , templates .template_default ->source () },
3638+ { " chat_template" , ctx_server. chat_templates .template_default ->source () },
36483639 { " build_info" , build_info },
36493640 };
3650- if (ctx_server.params_base .use_jinja && templates .template_tool_use ) {
3651- data[" chat_template_tool_use" ] = templates .template_tool_use ->source ();
3641+ if (ctx_server.params_base .use_jinja && ctx_server. chat_templates .template_tool_use ) {
3642+ data[" chat_template_tool_use" ] = ctx_server. chat_templates .template_tool_use ->source ();
36523643 }
36533644
36543645 res_ok (res, data);
@@ -3863,15 +3854,14 @@ int main(int argc, char ** argv) {
38633854 OAICOMPAT_TYPE_NONE); // infill is not OAI compatible
38643855 };
38653856
3866- const auto handle_chat_completions = [&ctx_server, ¶ms, &res_error, &handle_completions_impl, &get_chat_templates ](const httplib::Request & req, httplib::Response & res) {
3857+ const auto handle_chat_completions = [&ctx_server, ¶ms, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
38673858 if (ctx_server.params_base .embedding ) {
38683859 res_error (res, format_error_response (" This server does not support completions. Start it without `--embeddings`" , ERROR_TYPE_NOT_SUPPORTED));
38693860 return ;
38703861 }
38713862
38723863 auto body = json::parse (req.body );
3873- const auto & templates = get_chat_templates ();
3874- const auto & chat_template = body.contains (" tools" ) && templates.template_tool_use ? *templates.template_tool_use : *templates.template_default ;
3864+ const auto & chat_template = body.contains (" tools" ) && ctx_server.chat_templates .template_tool_use ? *ctx_server.chat_templates .template_tool_use : *ctx_server.chat_templates .template_default ;
38753865 json data = oaicompat_completion_params_parse (body, chat_template, params.use_jinja );
38763866
38773867 return handle_completions_impl (
@@ -4290,8 +4280,8 @@ int main(int argc, char ** argv) {
42904280
42914281 // print sample chat example to make it clear which template is used
42924282 LOG_INF (" %s: chat template, chat_template: %s, example_format: '%s'\n " , __func__,
4293- get_chat_templates () .template_default ->source ().c_str (),
4294- common_chat_format_example (*get_chat_templates () .template_default , ctx_server.params_base .use_jinja ).c_str ());
4283+ ctx_server. chat_templates .template_default ->source ().c_str (),
4284+ common_chat_format_example (*ctx_server. chat_templates .template_default , ctx_server.params_base .use_jinja ).c_str ());
42954285
42964286 ctx_server.queue_tasks .on_new_task (std::bind (
42974287 &server_context::process_single_task, &ctx_server, std::placeholders::_1));
0 commit comments