@@ -300,8 +300,19 @@ static llama_tokens format_infill(
300300}
301301
302302// Format given chat. If tmpl is empty, we take the template from model metadata
303+ // If messages[i]["prefix"] or messages[i]["prefix"] is present, we format the chat with custom prefix/suffix
303304inline std::string format_chat (const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
304305 std::vector<common_chat_msg> chat;
306+ bool is_custom = false ;
307+ std::ostringstream oss;
308+
309+ // if at least one message has custom prefix/suffix, we switch to custom formatting
310+ for (const auto & msg : messages) {
311+ if (msg.contains (" prefix" ) || msg.contains (" suffix" )) {
312+ is_custom = true ;
313+ break ;
314+ }
315+ }
305316
306317 for (size_t i = 0 ; i < messages.size (); ++i) {
307318 const auto & curr_msg = messages[i];
@@ -325,10 +336,18 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
325336 throw std::runtime_error (" Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)" );
326337 }
327338
328- chat.push_back ({role, content});
339+ if (is_custom) {
340+ std::string prefix = json_value (curr_msg, " prefix" , std::string (" " ));
341+ std::string suffix = json_value (curr_msg, " suffix" , std::string (" " ));
342+ oss << prefix << content << suffix;
343+ } else {
344+ chat.push_back ({role, content});
345+ }
329346 }
330347
331- const auto formatted_chat = common_chat_apply_template (model, tmpl, chat, true );
348+ const auto formatted_chat = is_custom
349+ ? oss.str ()
350+ : common_chat_apply_template (model, tmpl, chat, true );
332351 LOG_DBG (" formatted_chat: '%s'\n " , formatted_chat.c_str ());
333352
334353 return formatted_chat;
0 commit comments