alibaba
diff --git a/‎rtp_llm/models/deepseek_v2.py‎
Lines changed: 48 additions & 15 deletions b/‎rtp_llm/models/deepseek_v2.py‎
Lines changed: 48 additions & 15 deletions
diff --git a/‎rtp_llm/openai/renderers/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎rtp_llm/openai/renderers/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎rtp_llm/openai/renderers/deepseekv31_renderer.py‎
Lines changed: 25 additions & 27 deletions b/‎rtp_llm/openai/renderers/deepseekv31_renderer.py‎
Lines changed: 25 additions & 27 deletions
@@ -7,7 +7,6 @@
 import torch
 
 from rtp_llm.config.model_config import ModelConfig
-from rtp_llm.ops import MlaOpsType
 from rtp_llm.model_factory_register import register_model
 from rtp_llm.model_loader.attn_weight import MlaAttnAtomicWeight, MlaConfig
 from rtp_llm.model_loader.ffn_weight import (
@@ -29,6 +28,7 @@
 )
 from rtp_llm.models_py.model_desc.generic_moe import GenericMoeModel
 from rtp_llm.models_py.model_desc.module_base import GptModelBase
+from rtp_llm.ops import MlaOpsType
 from rtp_llm.utils.model_weight import (
     CkptWeightInfo,
     W,
@@ -73,7 +73,8 @@ def _get_hf_layer_weight_info(self, layer_id: int):
             kv_lora_rank=self.kv_lora_rank,
             ope_head_dim=self.nope_head_dim,
             v_head_dim=self.v_head_dim,
-            use_mla=self.model_config.attn_config.use_mla and self.model_config.mla_ops_type != MlaOpsType.MHA,
+            use_mla=self.model_config.attn_config.use_mla
+            and self.model_config.mla_ops_type != MlaOpsType.MHA,
             q_use_lora=self.q_use_lora,
         )
         layer_weights = [
@@ -225,7 +226,10 @@ def _get_hf_layer_weight_info(self, layer_id: int):
                 )
             )
 
-        if self.model_config.attn_config.use_mla and self.model_config.mla_ops_type != MlaOpsType.MHA:
+        if (
+            self.model_config.attn_config.use_mla
+            and self.model_config.mla_ops_type != MlaOpsType.MHA
+        ):
             mla_layer_weights.append(
                 MlaAttnAtomicWeight(
                     W.mla_kc,
@@ -522,7 +526,7 @@ def _create_python_model(self) -> Optional[GptModelBase]:
         py_hw_kernel_config = self.hw_kernel_config
         moe_config = self.moe_config
         max_generate_batch_size = self.max_generate_batch_size
-        
+
         # Use GenericMoeModel with new config architecture
         # attention_type is determined from model_config.attn_config.use_mla
         self.py_model = GenericMoeModel(
@@ -546,11 +550,13 @@ def _from_hf(config: ModelConfig, ckpt_path: str):
             config_json = json.loads(content)
             config.inter_size = config_json["intermediate_size"]
             config.attn_config.head_num = config_json["num_attention_heads"]
-            config.attn_config.kv_head_num = config_json.get("num_key_value_heads", config.attn_config.head_num)
+            config.attn_config.kv_head_num = config_json.get(
+                "num_key_value_heads", config.attn_config.head_num
+            )
             config.num_layers = config_json["num_hidden_layers"]
-            config.attn_config.rope_config.base = int(config_json.get(
-                "rope_theta", config.attn_config.rope_config.base
-            ))
+            config.attn_config.rope_config.base = int(
+                config_json.get("rope_theta", config.attn_config.rope_config.base)
+            )
             config.vocab_size = config_json["vocab_size"]
             config.layernorm_eps = config_json.get("rms_norm_eps", 1e-06)
             config.tie_word_embeddings = config_json.get("tie_word_embeddings", False)
@@ -559,13 +565,19 @@ def _from_hf(config: ModelConfig, ckpt_path: str):
             # MLA config
             config.attn_config.use_mla = True
             q_lora_rank = config_json.get("q_lora_rank")
-            config.attn_config.q_lora_rank = int(q_lora_rank) if q_lora_rank is not None else 0
+            config.attn_config.q_lora_rank = (
+                int(q_lora_rank) if q_lora_rank is not None else 0
+            )
             kv_lora_rank = config_json.get("kv_lora_rank")
-            config.attn_config.kv_lora_rank = int(kv_lora_rank) if kv_lora_rank is not None else 0
+            config.attn_config.kv_lora_rank = (
+                int(kv_lora_rank) if kv_lora_rank is not None else 0
+            )
             config.attn_config.nope_head_dim = config_json["qk_nope_head_dim"]
             config.attn_config.rope_head_dim = config_json["qk_rope_head_dim"]
             config.attn_config.v_head_dim = config_json["v_head_dim"]
-            config.attn_config.size_per_head = config.attn_config.nope_head_dim + config.attn_config.rope_head_dim
+            config.attn_config.size_per_head = (
+                config.attn_config.nope_head_dim + config.attn_config.rope_head_dim
+            )
             config.attn_config.rope_config.dim = config.attn_config.rope_head_dim
 
             # yarn rotary config
@@ -575,8 +587,12 @@ def _from_hf(config: ModelConfig, ckpt_path: str):
                 config.attn_config.rope_config.style = 5
             rope_scaling = config_json.get("rope_scaling")
             config.attn_config.rope_config.scale = rope_scaling["factor"]
-            config.attn_config.rope_config.factor1 = float(rope_scaling.get("beta_slow", 1))
-            config.attn_config.rope_config.factor2 = float(rope_scaling.get("beta_fast", 32))
+            config.attn_config.rope_config.factor1 = float(
+                rope_scaling.get("beta_slow", 1)
+            )
+            config.attn_config.rope_config.factor2 = float(
+                rope_scaling.get("beta_fast", 32)
+            )
             config.attn_config.rope_config.max_pos = rope_scaling[
                 "original_max_position_embeddings"
             ]
@@ -636,8 +652,25 @@ def get_weight_cls():
 
 class DeepSeekV3MtpWeight(DeepSeekV2Weight):
 
-    def __init__(self, model_config: ModelConfig, parallelism_config, hw_kernel_config, kv_cache_config, merge_lora: bool = False, vit_config=None, **kwargs):
-        super().__init__(model_config=model_config, parallelism_config=parallelism_config, hw_kernel_config=hw_kernel_config, kv_cache_config=kv_cache_config, merge_lora=merge_lora, vit_config=vit_config, **kwargs)
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        parallelism_config,
+        hw_kernel_config,
+        kv_cache_config,
+        merge_lora: bool = False,
+        vit_config=None,
+        **kwargs,
+    ):
+        super().__init__(
+            model_config=model_config,
+            parallelism_config=parallelism_config,
+            hw_kernel_config=hw_kernel_config,
+            kv_cache_config=kv_cache_config,
+            merge_lora=merge_lora,
+            vit_config=vit_config,
+            **kwargs,
+        )
 
     def _get_weight_info(self):
         layer_weights: List[List[WeightModule]] = []
 
@@ -3,6 +3,7 @@
 from .chatglm4_renderer import ChatGlm4Renderer
 from .chatglm45_renderer import ChatGlm45Renderer
 from .deepseekv31_renderer import DeepseekV31Renderer
+from .deepseekv32_renderer import DeepseekV32Renderer
 from .internvl_renderer import InternVLRenderer
 from .kimik2_renderer import KimiK2Renderer
 from .llava_renderer import LlavaRenderer
 
@@ -36,80 +36,78 @@
     {%- endif %}
   {%- endif %}
 {%- endfor %}
-
 {% if tools is defined and tools is not none %}
   {% set tool_ns = namespace(text='## Tools\nYou have access to the following tools:\n') %}
   {% for tool in tools %}
     {% set tool_ns.text = tool_ns.text + '\n### ' + tool.function.name + '\nDescription: ' + tool.function.description + '\n\nParameters: ' + (tool.function.parameters | tojson) + '\n' %}
   {% endfor %}
   {% set tool_ns.text = tool_ns.text + "\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>tool_call_name<｜tool▁sep｜>tool_call_arguments<｜tool▁call▁end｜>{{additional_tool_calls}}<｜tool▁calls▁end｜>\n\nWhere:\n\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n" %}
-  {% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
+  {% set ns.system_prompt = (ns.system_prompt + '\n\n' if ns.system_prompt else '') + tool_ns.text %}
 {% endif %}
-
-{{ bos_token }}{{ ns.system_prompt }}
+{{- bos_token }}{{ ns.system_prompt }}
 {%- for message in messages %}
   {%- if message['role'] == 'user' %}
     {%- set ns.is_tool = false -%}
     {%- set ns.is_first = false -%}
     {%- set ns.is_last_user = true -%}
-    {{'<｜User｜>' + message['content']}}
+    {{- '<｜User｜>' + message['content'] -}}
   {%- endif %}
   {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
-    {%- if ns.is_last_user %}
-      {{'<｜Assistant｜></think>'}}
+    {%- if ns.is_last_user -%}
+      {{- '<｜Assistant｜></think>' -}}
     {%- endif %}
     {%- set ns.is_last_user = false -%}
     {%- set ns.is_first = false %}
     {%- set ns.is_tool = false -%}
     {%- for tool in message['tool_calls'] %}
       {%- if not ns.is_first %}
         {%- if message['content'] is none %}
-          {{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>'}}
+          {{- '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>' -}}
         {%- else %}
-          {{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>'}}
+          {{- message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>' -}}
         {%- endif %}
         {%- set ns.is_first = true -%}
       {%- else %}
-        {{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>'}}
+        {{- '<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments']|tojson + '<｜tool▁call▁end｜>' -}}
       {%- endif %}
     {%- endfor %}
-    {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+    {{- '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>' -}}
   {%- endif %}
   {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
-    {%- if ns.is_last_user %}
-      {{'<｜Assistant｜>'}}
-      {%- if message['prefix'] is defined and message['prefix'] and thinking %}
-        {{'<think>'}}
-      {%- else %}
-        {{'</think>'}}
+    {%- if ns.is_last_user -%}
+      {{- '<｜Assistant｜>' -}}
+      {%- if message['prefix'] is defined and message['prefix'] and thinking -%}
+        {{- '<think>' -}}
+      {%- else -%}
+        {{- '</think>' -}}
       {%- endif %}
     {%- endif %}
     {%- set ns.is_last_user = false -%}
     {%- if ns.is_tool %}
-      {{message['content'] + '<｜end▁of▁sentence｜>'}}
+      {{- message['content'] + '<｜end▁of▁sentence｜>' -}}
       {%- set ns.is_tool = false -%}
     {%- else %}
       {%- set content = message['content'] -%}
       {%- if '</think>' in content %}
         {%- set content = content.split('</think>', 1)[1] -%}
       {%- endif %}
-      {{content + '<｜end▁of▁sentence｜>'}}
+      {{- content + '<｜end▁of▁sentence｜>' -}}
     {%- endif %}
   {%- endif %}
   {%- if message['role'] == 'tool' %}
     {%- set ns.is_last_user = false -%}
     {%- set ns.is_tool = true -%}
-    {{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {{- '<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>' -}}
   {%- endif %}
 {%- endfor -%}
-{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}
-  {{'<｜Assistant｜>'}}
-  {%- if not thinking %}
-    {{'</think>'}}
-  {%- else %}
-    {{'<think>'}}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}
+  {{- '<｜Assistant｜>' -}}
+  {%- if not thinking -%}
+    {{- '</think>' -}}
+  {%- else -%}
+    {{- '<think>' -}}
   {%- endif %}
-{% endif %}"""
+{%- endif %}"""
 
 
 class DeepseekV31Renderer(ReasoningToolBaseRenderer):