ggml-org · swordow · Feb 16, 2025 · Feb 17, 2025
@@ -26,39 +26,40 @@ static std::string trim(const std::string & str) {
 }
 
 static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
-    { "chatml",            LLM_CHAT_TEMPLATE_CHATML            },
-    { "llama2",            LLM_CHAT_TEMPLATE_LLAMA_2           },
-    { "llama2-sys",        LLM_CHAT_TEMPLATE_LLAMA_2_SYS       },
-    { "llama2-sys-bos",    LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS   },
-    { "llama2-sys-strip",  LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
-    { "mistral-v1",        LLM_CHAT_TEMPLATE_MISTRAL_V1        },
-    { "mistral-v3",        LLM_CHAT_TEMPLATE_MISTRAL_V3        },
-    { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
-    { "mistral-v7",        LLM_CHAT_TEMPLATE_MISTRAL_V7        },
-    { "phi3",              LLM_CHAT_TEMPLATE_PHI_3             },
-    { "phi4",              LLM_CHAT_TEMPLATE_PHI_4             },
-    { "falcon3",           LLM_CHAT_TEMPLATE_FALCON_3          },
-    { "zephyr",            LLM_CHAT_TEMPLATE_ZEPHYR            },
-    { "monarch",           LLM_CHAT_TEMPLATE_MONARCH           },
-    { "gemma",             LLM_CHAT_TEMPLATE_GEMMA             },
-    { "orion",             LLM_CHAT_TEMPLATE_ORION             },
-    { "openchat",          LLM_CHAT_TEMPLATE_OPENCHAT          },
-    { "vicuna",            LLM_CHAT_TEMPLATE_VICUNA            },
-    { "vicuna-orca",       LLM_CHAT_TEMPLATE_VICUNA_ORCA       },
-    { "deepseek",          LLM_CHAT_TEMPLATE_DEEPSEEK          },
-    { "deepseek2",         LLM_CHAT_TEMPLATE_DEEPSEEK_2        },
-    { "deepseek3",         LLM_CHAT_TEMPLATE_DEEPSEEK_3        },
-    { "command-r",         LLM_CHAT_TEMPLATE_COMMAND_R         },
-    { "llama3",            LLM_CHAT_TEMPLATE_LLAMA_3           },
-    { "chatglm3",          LLM_CHAT_TEMPLATE_CHATGML_3         },
-    { "chatglm4",          LLM_CHAT_TEMPLATE_CHATGML_4         },
-    { "glmedge",           LLM_CHAT_TEMPLATE_GLMEDGE           },
-    { "minicpm",           LLM_CHAT_TEMPLATE_MINICPM           },
-    { "exaone3",           LLM_CHAT_TEMPLATE_EXAONE_3          },
-    { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
-    { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
-    { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
-    { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
+    { "chatml",              LLM_CHAT_TEMPLATE_CHATML              },
+    { "llama2",              LLM_CHAT_TEMPLATE_LLAMA_2             },
+    { "llama2-sys",          LLM_CHAT_TEMPLATE_LLAMA_2_SYS         },
+    { "llama2-sys-bos",      LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS     },
+    { "llama2-sys-strip",    LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP   },
+    { "mistral-v1",          LLM_CHAT_TEMPLATE_MISTRAL_V1          },
+    { "mistral-v3",          LLM_CHAT_TEMPLATE_MISTRAL_V3          },
+    { "mistral-v3-tekken",   LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN   },
+    { "mistral-v7",          LLM_CHAT_TEMPLATE_MISTRAL_V7          },
+    { "phi3",                LLM_CHAT_TEMPLATE_PHI_3               },
+    { "phi4",                LLM_CHAT_TEMPLATE_PHI_4               },
+    { "falcon3",             LLM_CHAT_TEMPLATE_FALCON_3            },
+    { "zephyr",              LLM_CHAT_TEMPLATE_ZEPHYR              },
+    { "monarch",             LLM_CHAT_TEMPLATE_MONARCH             },
+    { "gemma",               LLM_CHAT_TEMPLATE_GEMMA               },
+    { "orion",               LLM_CHAT_TEMPLATE_ORION               },
+    { "openchat",            LLM_CHAT_TEMPLATE_OPENCHAT            },
+    { "vicuna",              LLM_CHAT_TEMPLATE_VICUNA              },
+    { "vicuna-orca",         LLM_CHAT_TEMPLATE_VICUNA_ORCA         },
+    { "deepseek",            LLM_CHAT_TEMPLATE_DEEPSEEK            },
+    { "deepseek2",           LLM_CHAT_TEMPLATE_DEEPSEEK_2          },
+    { "deepseek3",           LLM_CHAT_TEMPLATE_DEEPSEEK_3          },
+    { "deepseek-r1-distill", LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL },
+    { "command-r",           LLM_CHAT_TEMPLATE_COMMAND_R           },
+    { "llama3",              LLM_CHAT_TEMPLATE_LLAMA_3             },
+    { "chatglm3",            LLM_CHAT_TEMPLATE_CHATGML_3           },
+    { "chatglm4",            LLM_CHAT_TEMPLATE_CHATGML_4           },
+    { "glmedge",             LLM_CHAT_TEMPLATE_GLMEDGE             },
+    { "minicpm",             LLM_CHAT_TEMPLATE_MINICPM             },
+    { "exaone3",             LLM_CHAT_TEMPLATE_EXAONE_3            },
+    { "rwkv-world",          LLM_CHAT_TEMPLATE_RWKV_WORLD          },
+    { "granite",             LLM_CHAT_TEMPLATE_GRANITE             },
+    { "gigachat",            LLM_CHAT_TEMPLATE_GIGACHAT            },
+    { "megrez",              LLM_CHAT_TEMPLATE_MEGREZ              },
 };
 
 llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -154,6 +155,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
         return LLM_CHAT_TEMPLATE_MINICPM;
     } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
         return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
+    } else if (tmpl_contains(LU8("<｜Assistant｜>")) && tmpl_contains(LU8("<｜User｜>")) && tmpl_contains(LU8("<｜end▁of▁sentence｜>")) && tmpl_contains(LU8("<｜Assistant｜><think>\\n"))) {
+        return LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL;
     } else if (tmpl_contains(LU8("<｜Assistant｜>")) && tmpl_contains(LU8("<｜User｜>")) && tmpl_contains(LU8("<｜end▁of▁sentence｜>"))) {
         return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
     } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
@@ -492,7 +495,24 @@ int32_t llm_chat_apply_template(
         if (add_ass) {
             ss << LU8("<｜Assistant｜>");
         }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
+    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL) {
+        // DeepSeek-R1-Distill
+        ss << "<｜begin▁of▁sentence｜>";
+        for (auto message : chat) {
+            std::string role(message->role);
+            if (role == "system") {
+                ss << message->content;
+            } else if (role == "user") {
+                ss << LU8("<｜User｜>") << message->content;
+            } else if (role == "assistant") {
+                ss << LU8("<｜Assistant｜>") << message->content << LU8("<｜end▁of▁sentence｜>");
+            }
+        }
+        if (add_ass) {
+            ss << LU8("<｜Assistant｜>");
+        }
+    }
+    else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
         // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
         // EXAONE-3.0-7.8B-Instruct
         for (auto message : chat) {

@@ -27,6 +27,7 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_DEEPSEEK,
     LLM_CHAT_TEMPLATE_DEEPSEEK_2,
     LLM_CHAT_TEMPLATE_DEEPSEEK_3,
+    LLM_CHAT_TEMPLATE_DEEPSEEK_R1_DISTILL,
     LLM_CHAT_TEMPLATE_COMMAND_R,
     LLM_CHAT_TEMPLATE_LLAMA_3,
     LLM_CHAT_TEMPLATE_CHATGML_3,

@@ -1781,7 +1781,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                         || t.first == "<end_of_turn>"
                         || t.first == "<|endoftext|>"
                         || t.first == "<EOT>"
-                        || t.first == "<｜end▁of▁sentence｜>" // DeepSeek
+                        || t.first == "<|EOT|>"// DeepSeek-r1
                    ) {
                     special_eot_id = t.second;
                     if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@@ -1791,7 +1791,19 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     }
                 }
             }
-
+             // find EOS token: "<｜end▁of▁sentence｜>", etc. // for deepseek
+            if (special_eos_id == LLAMA_TOKEN_NULL) {
+                if (false
+                        || t.first == "<｜end▁of▁sentence｜>" // DeepSeek
+                   ) {
+                    special_eos_id = t.second;
+                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                        LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
+                                __func__, t.second, t.first.c_str());
+                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                    }
+                }
+            }
             // find EOM token: "<|eom_id|>"
             if (special_eom_id == LLAMA_TOKEN_NULL) {
                 if (false
@@ -1931,6 +1943,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                     || t.first == "<|endoftext|>"
                     || t.first == "<|eom_id|>"
                     || t.first == "<EOT>"
+                    || t.first == "<|EOT|>"// DeepSeek
+                    || t.first == "<｜end▁of▁sentence｜>" // DeepSeek
                ) {
                 special_eog_ids.insert(t.second);
                 if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {

@@ -270,6 +270,15 @@ int main(void) {
             /* .bos_token= */ "",
             /* .eos_token= */ "",
         },
+        {
+            /* .name= */ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+            /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
+            /* .expected_output= */ "<｜begin▁of▁sentence｜>You are a helpful assistant<｜User｜>Hello<｜Assistant｜>Hi there<｜end▁of▁sentence｜><｜User｜>Who are you<｜Assistant｜>   I am an assistant   <｜end▁of▁sentence｜><｜User｜>Another question<｜Assistant｜><think>\n",
+            /* .expected_output_jinja= */ "<｜begin▁of▁sentence｜>You are a helpful assistant<｜User｜>Hello<｜Assistant｜>Hi there<｜end▁of▁sentence｜><｜User｜>Who are you<｜Assistant｜>   I am an assistant   <｜end▁of▁sentence｜><｜User｜>Another question<｜Assistant｜><think>\n",
+            /* .bos_token= */ "<｜begin▁of▁sentence｜>",
+            /* .eos_token= */ "<｜end▁of▁sentence｜>",
+            /* .supported_with_jinja= */ true, // Requires additional_special_tokens as extra context
+        },
     };
     std::vector<char> formatted_chat(1024);
     int32_t res;