diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index de44586967d71..1682cd6393140 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -26,39 +26,40 @@ static std::string trim(const std::string & str) { } static const std::map LLM_CHAT_TEMPLATES = { - { "chatml", LLM_CHAT_TEMPLATE_CHATML }, - { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 }, - { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS }, - { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS }, - { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP }, - { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 }, - { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 }, - { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, - { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, - { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, - { "phi4", LLM_CHAT_TEMPLATE_PHI_4 }, - { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, - { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR }, - { "monarch", LLM_CHAT_TEMPLATE_MONARCH }, - { "gemma", LLM_CHAT_TEMPLATE_GEMMA }, - { "orion", LLM_CHAT_TEMPLATE_ORION }, - { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT }, - { "vicuna", LLM_CHAT_TEMPLATE_VICUNA }, - { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA }, - { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK }, - { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 }, - { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 }, - { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R }, - { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 }, - { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 }, - { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 }, - { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE }, - { "minicpm", LLM_CHAT_TEMPLATE_MINICPM }, - { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, - { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, - { "granite", LLM_CHAT_TEMPLATE_GRANITE }, - { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, - { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, + { "chatml", LLM_CHAT_TEMPLATE_CHATML }, + { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 }, + { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS }, + { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS }, + { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP }, + { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 }, + { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 }, + { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, + { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, + { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, + { "phi4", LLM_CHAT_TEMPLATE_PHI_4 }, + { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, + { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR }, + { "monarch", LLM_CHAT_TEMPLATE_MONARCH }, + { "gemma", LLM_CHAT_TEMPLATE_GEMMA }, + { "orion", LLM_CHAT_TEMPLATE_ORION }, + { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT }, + { "vicuna", LLM_CHAT_TEMPLATE_VICUNA }, + { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA }, + { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK }, + { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 }, + { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 }, + { "deepseek-r1-distill", LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL }, + { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R }, + { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 }, + { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 }, + { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 }, + { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE }, + { "minicpm", LLM_CHAT_TEMPLATE_MINICPM }, + { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, + { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, + { "granite", LLM_CHAT_TEMPLATE_GRANITE }, + { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, + { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, }; llm_chat_template llm_chat_template_from_str(const std::string & name) { @@ -154,6 +155,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_MINICPM; } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) { return LLM_CHAT_TEMPLATE_DEEPSEEK_2; + } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>")) && tmpl_contains(LU8("<|Assistant|>\\n"))) { + return LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL; } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) { return LLM_CHAT_TEMPLATE_DEEPSEEK_3; } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) { @@ -492,7 +495,24 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << LU8("<|Assistant|>"); } - } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) { + } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL) { + // DeepSeek-R1-Distill + ss << "<|begin▁of▁sentence|>"; + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + ss << message->content; + } else if (role == "user") { + ss << LU8("<|User|>") << message->content; + } else if (role == "assistant") { + ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>"); + } + } + if (add_ass) { + ss << LU8("<|Assistant|>"); + } + } + else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) { // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb // EXAONE-3.0-7.8B-Instruct for (auto message : chat) { diff --git a/src/llama-chat.h b/src/llama-chat.h index 2f6a0e3e28266..b27061e2f21f2 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -27,6 +27,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_DEEPSEEK, LLM_CHAT_TEMPLATE_DEEPSEEK_2, LLM_CHAT_TEMPLATE_DEEPSEEK_3, + LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL, LLM_CHAT_TEMPLATE_COMMAND_R, LLM_CHAT_TEMPLATE_LLAMA_3, LLM_CHAT_TEMPLATE_CHATGML_3, diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index a708d8b88c8c7..548c4f59834c0 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1781,7 +1781,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" || t.first == "<|endoftext|>" || t.first == "" - || t.first == "<|end▁of▁sentence|>" // DeepSeek + || t.first == "<|EOT|>"// DeepSeek-r1 ) { special_eot_id = t.second; if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { @@ -1791,7 +1791,19 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { } } } - + // find EOS token: "<|end▁of▁sentence|>", etc. // for deepseek + if (special_eos_id == LLAMA_TOKEN_NULL) { + if (false + || t.first == "<|end▁of▁sentence|>" // DeepSeek + ) { + special_eos_id = t.second; + if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { + LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n", + __func__, t.second, t.first.c_str()); + id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL; + } + } + } // find EOM token: "<|eom_id|>" if (special_eom_id == LLAMA_TOKEN_NULL) { if (false @@ -1931,6 +1943,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|endoftext|>" || t.first == "<|eom_id|>" || t.first == "" + || t.first == "<|EOT|>"// DeepSeek + || t.first == "<|end▁of▁sentence|>" // DeepSeek ) { special_eog_ids.insert(t.second); if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 9231c517afb0b..abd8628bed546 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -270,6 +270,15 @@ int main(void) { /* .bos_token= */ "", /* .eos_token= */ "", }, + { + /* .name= */ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + /* .expected_output= */ "<|begin▁of▁sentence|>You are a helpful assistant<|User|>Hello<|Assistant|>Hi there<|end▁of▁sentence|><|User|>Who are you<|Assistant|> I am an assistant <|end▁of▁sentence|><|User|>Another question<|Assistant|>\n", + /* .expected_output_jinja= */ "<|begin▁of▁sentence|>You are a helpful assistant<|User|>Hello<|Assistant|>Hi there<|end▁of▁sentence|><|User|>Who are you<|Assistant|> I am an assistant <|end▁of▁sentence|><|User|>Another question<|Assistant|>\n", + /* .bos_token= */ "<|begin▁of▁sentence|>", + /* .eos_token= */ "<|end▁of▁sentence|>", + /* .supported_with_jinja= */ true, // Requires additional_special_tokens as extra context + }, }; std::vector formatted_chat(1024); int32_t res;