Skip to content

Commit f61910e

Browse files
committed
feat: support ds v3.2 encoding and remove redundant char in ds v3.1 jinja template
1 parent d4b6b59 commit f61910e

File tree

6 files changed

+782
-47
lines changed

6 files changed

+782
-47
lines changed

rtp_llm/models/deepseek_v2.py

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import torch
88

99
from rtp_llm.config.model_config import ModelConfig
10-
from rtp_llm.ops import MlaOpsType
1110
from rtp_llm.model_factory_register import register_model
1211
from rtp_llm.model_loader.attn_weight import MlaAttnAtomicWeight, MlaConfig
1312
from rtp_llm.model_loader.ffn_weight import (
@@ -29,6 +28,7 @@
2928
)
3029
from rtp_llm.models_py.model_desc.generic_moe import GenericMoeModel
3130
from rtp_llm.models_py.model_desc.module_base import GptModelBase
31+
from rtp_llm.ops import MlaOpsType
3232
from rtp_llm.utils.model_weight import (
3333
CkptWeightInfo,
3434
W,
@@ -73,7 +73,8 @@ def _get_hf_layer_weight_info(self, layer_id: int):
7373
kv_lora_rank=self.kv_lora_rank,
7474
ope_head_dim=self.nope_head_dim,
7575
v_head_dim=self.v_head_dim,
76-
use_mla=self.model_config.attn_config.use_mla and self.model_config.mla_ops_type != MlaOpsType.MHA,
76+
use_mla=self.model_config.attn_config.use_mla
77+
and self.model_config.mla_ops_type != MlaOpsType.MHA,
7778
q_use_lora=self.q_use_lora,
7879
)
7980
layer_weights = [
@@ -225,7 +226,10 @@ def _get_hf_layer_weight_info(self, layer_id: int):
225226
)
226227
)
227228

228-
if self.model_config.attn_config.use_mla and self.model_config.mla_ops_type != MlaOpsType.MHA:
229+
if (
230+
self.model_config.attn_config.use_mla
231+
and self.model_config.mla_ops_type != MlaOpsType.MHA
232+
):
229233
mla_layer_weights.append(
230234
MlaAttnAtomicWeight(
231235
W.mla_kc,
@@ -522,7 +526,7 @@ def _create_python_model(self) -> Optional[GptModelBase]:
522526
py_hw_kernel_config = self.hw_kernel_config
523527
moe_config = self.moe_config
524528
max_generate_batch_size = self.max_generate_batch_size
525-
529+
526530
# Use GenericMoeModel with new config architecture
527531
# attention_type is determined from model_config.attn_config.use_mla
528532
self.py_model = GenericMoeModel(
@@ -546,11 +550,13 @@ def _from_hf(config: ModelConfig, ckpt_path: str):
546550
config_json = json.loads(content)
547551
config.inter_size = config_json["intermediate_size"]
548552
config.attn_config.head_num = config_json["num_attention_heads"]
549-
config.attn_config.kv_head_num = config_json.get("num_key_value_heads", config.attn_config.head_num)
553+
config.attn_config.kv_head_num = config_json.get(
554+
"num_key_value_heads", config.attn_config.head_num
555+
)
550556
config.num_layers = config_json["num_hidden_layers"]
551-
config.attn_config.rope_config.base = int(config_json.get(
552-
"rope_theta", config.attn_config.rope_config.base
553-
))
557+
config.attn_config.rope_config.base = int(
558+
config_json.get("rope_theta", config.attn_config.rope_config.base)
559+
)
554560
config.vocab_size = config_json["vocab_size"]
555561
config.layernorm_eps = config_json.get("rms_norm_eps", 1e-06)
556562
config.tie_word_embeddings = config_json.get("tie_word_embeddings", False)
@@ -559,13 +565,19 @@ def _from_hf(config: ModelConfig, ckpt_path: str):
559565
# MLA config
560566
config.attn_config.use_mla = True
561567
q_lora_rank = config_json.get("q_lora_rank")
562-
config.attn_config.q_lora_rank = int(q_lora_rank) if q_lora_rank is not None else 0
568+
config.attn_config.q_lora_rank = (
569+
int(q_lora_rank) if q_lora_rank is not None else 0
570+
)
563571
kv_lora_rank = config_json.get("kv_lora_rank")
564-
config.attn_config.kv_lora_rank = int(kv_lora_rank) if kv_lora_rank is not None else 0
572+
config.attn_config.kv_lora_rank = (
573+
int(kv_lora_rank) if kv_lora_rank is not None else 0
574+
)
565575
config.attn_config.nope_head_dim = config_json["qk_nope_head_dim"]
566576
config.attn_config.rope_head_dim = config_json["qk_rope_head_dim"]
567577
config.attn_config.v_head_dim = config_json["v_head_dim"]
568-
config.attn_config.size_per_head = config.attn_config.nope_head_dim + config.attn_config.rope_head_dim
578+
config.attn_config.size_per_head = (
579+
config.attn_config.nope_head_dim + config.attn_config.rope_head_dim
580+
)
569581
config.attn_config.rope_config.dim = config.attn_config.rope_head_dim
570582

571583
# yarn rotary config
@@ -575,8 +587,12 @@ def _from_hf(config: ModelConfig, ckpt_path: str):
575587
config.attn_config.rope_config.style = 5
576588
rope_scaling = config_json.get("rope_scaling")
577589
config.attn_config.rope_config.scale = rope_scaling["factor"]
578-
config.attn_config.rope_config.factor1 = float(rope_scaling.get("beta_slow", 1))
579-
config.attn_config.rope_config.factor2 = float(rope_scaling.get("beta_fast", 32))
590+
config.attn_config.rope_config.factor1 = float(
591+
rope_scaling.get("beta_slow", 1)
592+
)
593+
config.attn_config.rope_config.factor2 = float(
594+
rope_scaling.get("beta_fast", 32)
595+
)
580596
config.attn_config.rope_config.max_pos = rope_scaling[
581597
"original_max_position_embeddings"
582598
]
@@ -636,8 +652,25 @@ def get_weight_cls():
636652

637653
class DeepSeekV3MtpWeight(DeepSeekV2Weight):
638654

639-
def __init__(self, model_config: ModelConfig, parallelism_config, hw_kernel_config, kv_cache_config, merge_lora: bool = False, vit_config=None, **kwargs):
640-
super().__init__(model_config=model_config, parallelism_config=parallelism_config, hw_kernel_config=hw_kernel_config, kv_cache_config=kv_cache_config, merge_lora=merge_lora, vit_config=vit_config, **kwargs)
655+
def __init__(
656+
self,
657+
model_config: ModelConfig,
658+
parallelism_config,
659+
hw_kernel_config,
660+
kv_cache_config,
661+
merge_lora: bool = False,
662+
vit_config=None,
663+
**kwargs,
664+
):
665+
super().__init__(
666+
model_config=model_config,
667+
parallelism_config=parallelism_config,
668+
hw_kernel_config=hw_kernel_config,
669+
kv_cache_config=kv_cache_config,
670+
merge_lora=merge_lora,
671+
vit_config=vit_config,
672+
**kwargs,
673+
)
641674

642675
def _get_weight_info(self):
643676
layer_weights: List[List[WeightModule]] = []

rtp_llm/openai/renderers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .chatglm4_renderer import ChatGlm4Renderer
44
from .chatglm45_renderer import ChatGlm45Renderer
55
from .deepseekv31_renderer import DeepseekV31Renderer
6+
from .deepseekv32_renderer import DeepseekV32Renderer
67
from .internvl_renderer import InternVLRenderer
78
from .kimik2_renderer import KimiK2Renderer
89
from .llava_renderer import LlavaRenderer

rtp_llm/openai/renderers/deepseekv31_renderer.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,80 +36,78 @@
3636
{%- endif %}
3737
{%- endif %}
3838
{%- endfor %}
39-
4039
{% if tools is defined and tools is not none %}
4140
{% set tool_ns = namespace(text='## Tools\nYou have access to the following tools:\n') %}
4241
{% for tool in tools %}
4342
{% set tool_ns.text = tool_ns.text + '\n### ' + tool.function.name + '\nDescription: ' + tool.function.description + '\n\nParameters: ' + (tool.function.parameters | tojson) + '\n' %}
4443
{% endfor %}
4544
{% set tool_ns.text = tool_ns.text + "\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<|tool▁calls▁begin|><|tool▁call▁begin|>tool_call_name<|tool▁sep|>tool_call_arguments<|tool▁call▁end|>{{additional_tool_calls}}<|tool▁calls▁end|>\n\nWhere:\n\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n" %}
46-
{% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
45+
{% set ns.system_prompt = (ns.system_prompt + '\n\n' if ns.system_prompt else '') + tool_ns.text %}
4746
{% endif %}
48-
49-
{{ bos_token }}{{ ns.system_prompt }}
47+
{{- bos_token }}{{ ns.system_prompt }}
5048
{%- for message in messages %}
5149
{%- if message['role'] == 'user' %}
5250
{%- set ns.is_tool = false -%}
5351
{%- set ns.is_first = false -%}
5452
{%- set ns.is_last_user = true -%}
55-
{{'<|User|>' + message['content']}}
53+
{{- '<|User|>' + message['content'] -}}
5654
{%- endif %}
5755
{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
58-
{%- if ns.is_last_user %}
59-
{{'<|Assistant|></think>'}}
56+
{%- if ns.is_last_user -%}
57+
{{- '<|Assistant|></think>' -}}
6058
{%- endif %}
6159
{%- set ns.is_last_user = false -%}
6260
{%- set ns.is_first = false %}
6361
{%- set ns.is_tool = false -%}
6462
{%- for tool in message['tool_calls'] %}
6563
{%- if not ns.is_first %}
6664
{%- if message['content'] is none %}
67-
{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
65+
{{- '<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>' -}}
6866
{%- else %}
69-
{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
67+
{{- message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>' -}}
7068
{%- endif %}
7169
{%- set ns.is_first = true -%}
7270
{%- else %}
73-
{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
71+
{{- '<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>' -}}
7472
{%- endif %}
7573
{%- endfor %}
76-
{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
74+
{{- '<|tool▁calls▁end|><|end▁of▁sentence|>' -}}
7775
{%- endif %}
7876
{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
79-
{%- if ns.is_last_user %}
80-
{{'<|Assistant|>'}}
81-
{%- if message['prefix'] is defined and message['prefix'] and thinking %}
82-
{{'<think>'}}
83-
{%- else %}
84-
{{'</think>'}}
77+
{%- if ns.is_last_user -%}
78+
{{- '<|Assistant|>' -}}
79+
{%- if message['prefix'] is defined and message['prefix'] and thinking -%}
80+
{{- '<think>' -}}
81+
{%- else -%}
82+
{{- '</think>' -}}
8583
{%- endif %}
8684
{%- endif %}
8785
{%- set ns.is_last_user = false -%}
8886
{%- if ns.is_tool %}
89-
{{message['content'] + '<|end▁of▁sentence|>'}}
87+
{{- message['content'] + '<|end▁of▁sentence|>' -}}
9088
{%- set ns.is_tool = false -%}
9189
{%- else %}
9290
{%- set content = message['content'] -%}
9391
{%- if '</think>' in content %}
9492
{%- set content = content.split('</think>', 1)[1] -%}
9593
{%- endif %}
96-
{{content + '<|end▁of▁sentence|>'}}
94+
{{- content + '<|end▁of▁sentence|>' -}}
9795
{%- endif %}
9896
{%- endif %}
9997
{%- if message['role'] == 'tool' %}
10098
{%- set ns.is_last_user = false -%}
10199
{%- set ns.is_tool = true -%}
102-
{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
100+
{{- '<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>' -}}
103101
{%- endif %}
104102
{%- endfor -%}
105-
{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}
106-
{{'<|Assistant|>'}}
107-
{%- if not thinking %}
108-
{{'</think>'}}
109-
{%- else %}
110-
{{'<think>'}}
103+
{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}
104+
{{- '<|Assistant|>' -}}
105+
{%- if not thinking -%}
106+
{{- '</think>' -}}
107+
{%- else -%}
108+
{{- '<think>' -}}
111109
{%- endif %}
112-
{% endif %}"""
110+
{%- endif %}"""
113111

114112

115113
class DeepseekV31Renderer(ReasoningToolBaseRenderer):

0 commit comments

Comments
 (0)