[Python][Model] Add ministral3_reasoning template (#3419)

akaashrp · web-flow · commit b8c5d3ebf7d4 · 2026-02-13T10:00:07.000-05:00
diff --git a/python/mlc_llm/conversation_template/__init__.py b/python/mlc_llm/conversation_template/__init__.py
@@ -17,6 +17,7 @@
     llama,
     llava,
     ministral3,
+    ministral3_reasoning,
     mistral,
     nemotron,
     oasst,
diff --git a/python/mlc_llm/conversation_template/ministral3_reasoning.py b/python/mlc_llm/conversation_template/ministral3_reasoning.py
@@ -0,0 +1,38 @@
+"""Ministral3 reasoning templates"""
+
+from mlc_llm.protocol.conversation_protocol import Conversation, MessagePlaceholders
+
+from .registry import ConvTemplateRegistry
+
+# Ministral-3-XB-Reasoning-2512
+ConvTemplateRegistry.register_conv_template(
+    Conversation(
+        name="ministral3_reasoning",
+        system_template=(
+            f"[SYSTEM_PROMPT]{MessagePlaceholders.SYSTEM.value}[/SYSTEM_PROMPT]"
+            f"{MessagePlaceholders.FUNCTION.value}"
+        ),
+        system_message=(
+            "# HOW YOU SHOULD THINK AND ANSWER\n\n"
+            "First draft your thinking process (inner monologue) until you arrive at a response. "
+            "Format your response using Markdown, and use LaTeX for any mathematical equations. "
+            "Write both your thoughts and the response in the same language as the input.\n\n"
+            "Your thinking process must follow the template below:"
+            "[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. "
+            "Be as casual and as long as you want until you are confident to generate the response "
+            "to the user.[/THINK]Here, provide a self-contained response."
+        ),
+        role_templates={
+            "user": f"[INST]{MessagePlaceholders.USER.value}[/INST]",
+            "assistant": f"{MessagePlaceholders.ASSISTANT.value}</s>",
+            "tool": f"[TOOL_RESULTS]{MessagePlaceholders.TOOL.value}[/TOOL_RESULTS]",
+        },
+        roles={"user": "", "assistant": "", "tool": ""},
+        seps=[""],
+        role_content_sep="",
+        role_empty_sep="",
+        stop_str=["</s>"],
+        stop_token_ids=[2],
+        system_prefix_token_ids=[1],
+    )
+)
diff --git a/python/mlc_llm/interface/gen_config.py b/python/mlc_llm/interface/gen_config.py
@@ -310,6 +310,7 @@ def gen_config(  # pylint: disable=too-many-locals,too-many-arguments,too-many-b
     "llama-2",
     "mistral_default",
     "ministral3",
+    "ministral3_reasoning",
     "gpt2",
     "codellama_completion",
     "codellama_instruct",
diff --git a/python/mlc_llm/model/model_preset.py b/python/mlc_llm/model/model_preset.py
@@ -250,7 +250,7 @@
         "use_cache": True,
         "vocab_size": 32768,
     },
-    "ministral3_3b_reasoning_2512": {
+    "ministral3_3b_2512": {
         "architectures": ["Mistral3ForConditionalGeneration"],
         "dtype": "bfloat16",
         "image_token_index": 10,