@@ -21862,41 +21862,85 @@ static int32_t llama_chat_apply_template_internal(
2186221862 if (add_ass) {
2186321863 ss << "<|im_start|>assistant\n";
2186421864 }
21865- } else if (tmpl == "llama2" || tmpl == "mistral" || tmpl_contains("[INST]")) {
21866- // llama2 template and its variants
21867- // [variant] support system message
21868- bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "mistral";
21869- // [variant] space before + after response
21870- bool space_around_response = tmpl_contains("' ' + eos_token");
21871- // [variant] add BOS inside history
21872- bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
21873- // [variant] trim spaces from the input message
21874- bool strip_message = tmpl_contains("content.strip()");
21875- // construct the prompt
21876- bool is_inside_turn = true; // skip BOS at the beginning
21877- ss << "[INST] ";
21878- for (auto message : chat) {
21879- std::string content = strip_message ? trim(message->content) : message->content;
21880- std::string role(message->role);
21881- if (!is_inside_turn) {
21882- is_inside_turn = true;
21883- ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
21865+ } else if (tmpl == "llama2" || tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
21866+ if (tmpl == "mistral-v7" || tmpl_contains("[SYSTEM_PROMPT]")) {
21867+ // Official mistral 'v7' template
21868+ // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
21869+ for (auto message : chat) {
21870+ std::string role(message->role);
21871+ std::string content(message->content);
21872+ if (role == "system") {
21873+ ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
21874+ } else if (role == "user") {
21875+ ss << "[INST] " << content << "[/INST]";
21876+ }
21877+ else {
21878+ ss << " " << content << "</s>";
21879+ }
2188421880 }
21885- if (role == "system") {
21886- if (support_system_message) {
21887- ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
21881+ } else if (tmpl == "mistral-v1" || tmpl == "mistral-v3" || tmpl == "mistral-v3-tekken"
21882+ || tmpl_contains("' [INST] ' + system_message") // catches official 'v1' template
21883+ || tmpl_contains("[AVAILABLE_TOOLS]")) { // catches official 'v3' and 'v3-tekken' templates
21884+ // Official mistral 'v1', 'v3' and 'v3-tekken' templates
21885+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
21886+ // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
21887+ std::string leading_space = (tmpl == "mistral-v1" || tmpl_contains(" [INST]") ? " " : "");
21888+ std::string trailing_space = (tmpl == "mistral-v3-tekken" || tmpl_contains("\"[INST]\"") ? "" : " ");
21889+ bool trim_assistant_message = tmpl_contains("|trim + eos_token");
21890+ bool is_inside_turn = false;
21891+ for (auto message : chat) {
21892+ if (!is_inside_turn) {
21893+ ss << leading_space << "[INST]" << trailing_space;
21894+ is_inside_turn = true;
21895+ }
21896+ std::string role(message->role);
21897+ std::string content(message->content);
21898+ if (role == "system") {
21899+ ss << content << "\n\n";
21900+ } else if (role == "user") {
21901+ ss << content << leading_space << "[/INST]";
2188821902 } else {
21889- // if the model does not support system message, we still include it in the first message, but without <<SYS>>
21890- ss << content << "\n";
21903+ ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
21904+ is_inside_turn = false;
21905+ }
21906+ }
21907+ } else {
21908+ // llama2 template and its variants
21909+ // [variant] support system message
21910+ // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
21911+ bool support_system_message = tmpl_contains("<<SYS>>") || tmpl == "llama2";
21912+ // [variant] space before + after response
21913+ bool space_around_response = tmpl_contains("' ' + eos_token");
21914+ // [variant] add BOS inside history
21915+ bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
21916+ // [variant] trim spaces from the input message
21917+ bool strip_message = tmpl_contains("content.strip()");
21918+ // construct the prompt
21919+ bool is_inside_turn = true; // skip BOS at the beginning
21920+ ss << "[INST] ";
21921+ for (auto message : chat) {
21922+ std::string content = strip_message ? trim(message->content) : message->content;
21923+ std::string role(message->role);
21924+ if (!is_inside_turn) {
21925+ is_inside_turn = true;
21926+ ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
21927+ }
21928+ if (role == "system") {
21929+ if (support_system_message) {
21930+ ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
21931+ } else {
21932+ // if the model does not support system message, we still include it in the first message, but without <<SYS>>
21933+ ss << content << "\n";
21934+ }
21935+ } else if (role == "user") {
21936+ ss << content << " [/INST]";
21937+ } else {
21938+ ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
21939+ is_inside_turn = false;
2189121940 }
21892- } else if (role == "user") {
21893- ss << content << " [/INST]";
21894- } else {
21895- ss << (space_around_response ? " " : "") << content << (space_around_response ? " " : "") << "</s>";
21896- is_inside_turn = false;
2189721941 }
21942+ // llama2 templates seem to not care about "add_generation_prompt
2189821943 }
21899- // llama2 templates seem to not care about "add_generation_prompt"
2190021944 } else if (tmpl == "phi3" || (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>"))) {
2190121945 // Phi 3
2190221946 for (auto message : chat) {
0 commit comments