Simplify logics even further

MaggotHATE · MaggotHATE · commit d92f518253a5 · 2024-11-21T22:46:44.000+05:00
* if no `chat_template` is passed, we can rely on `common_chat_apply_template` function
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -3226,7 +3226,6 @@ int main(int argc, char ** argv) {
             LOG_WRN("%s: Prefix and suffix will be used for a custom chat template. This may cause the model to output suboptimal responses\n", __func__);
         } else if (!ctx_server.validate_model_chat_template()) {
             LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
-            params.chat_template = "chatml";
         }
     } else if (!params.input_prefix.empty() || !params.input_suffix.empty()) {
         LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a chat template '%s' is chosen.\n", __func__, params.chat_template.c_str());
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
@@ -304,7 +304,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
     std::vector<common_chat_msg> chat;
     std::string formatted_chat;
 
-    bool is_custom = !prefix.empty() || !suffix.empty();
+    bool is_custom = tmpl.empty() && (!prefix.empty() || !suffix.empty());
 
     for (size_t i = 0; i < messages.size(); ++i) {
         const auto & curr_msg = messages[i];
@@ -337,7 +337,13 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
         }
     }
 
-    if (!is_custom) formatted_chat = common_chat_apply_template(model, tmpl, chat, true);
+    if (!is_custom) {
+        LOG_WRN("Using '%s' template, prefix and suffix are ignored.\n", tmpl.c_str());
+        formatted_chat = common_chat_apply_template(model, tmpl, chat, true);
+    } else {
+        LOG_WRN("Used prefix '%s' and suffix '%s'.\n", prefix.c_str(), suffix.c_str());
+    }
+
     LOG_DBG("formatted_chat using '%s': '%s'\n", tmpl.c_str(), formatted_chat.c_str());
 
     return formatted_chat;
@@ -353,7 +359,7 @@ inline std::string format_chat_example(const struct llama_model * model, const s
 
     std::string formatted_example;
 
-    if (!prefix.empty() || !suffix.empty()) {
+    if (tmpl.empty() && (!prefix.empty() || !suffix.empty())) {
         for (auto message : msgs) {
             if (message.role == "user") formatted_example += prefix + message.content + suffix;
             else formatted_example += message.content;
@@ -640,20 +646,12 @@ static json oaicompat_completion_params_parse(
     std::string prefix = (body.contains("input_prefix") ? body.at("input_prefix").get<std::string>() : "");
     std::string suffix = (body.contains("input_suffix") ? body.at("input_suffix").get<std::string>() : "");
 
-    // if template is sent in data, ignore prefix and suffix
-    if (!chat_tmpl.empty()) {
-        LOG_WRN("\nUsing '%s' template, prefix and suffix are ignored.\n", chat_tmpl.c_str());
-        prefix = "";
-        suffix = "";
-    } else {
-        if (prefix.empty()) {
-            prefix = input_prefix;
-        }
+    if (prefix.empty()) {
+        prefix = input_prefix;
+    }
 
-        if (suffix.empty()) {
-            suffix = input_suffix;
-        }
-        LOG_WRN("\nUsing prefix '%s' and suffix '%s'.\n", prefix.c_str(), suffix.c_str());
+    if (suffix.empty()) {
+        suffix = input_suffix;
     }
 
     llama_params["prompt"] = format_chat(model, chat_tmpl, prefix, suffix, body.at("messages"));

Original file line number	Diff line number	Diff line change
`@@ -3226,7 +3226,6 @@ int main(int argc, char ** argv) {`
`3226`	`3226`	`LOG_WRN("%s: Prefix and suffix will be used for a custom chat template. This may cause the model to output suboptimal responses\n", __func__);`
`3227`	`3227`	`} else if (!ctx_server.validate_model_chat_template()) {`
`3228`	`3228`	`LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);`
`3229`		`- params.chat_template = "chatml";`
`3230`	`3229`	`}`
`3231`	`3230`	`} else if (!params.input_prefix.empty() \|\| !params.input_suffix.empty()) {`
`3232`	`3231`	`LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a chat template '%s' is chosen.\n", __func__, params.chat_template.c_str());`