Skip to content

Commit bcdcab2

Browse files
committed
update deepseek-v2
1 parent 3aa9f4c commit bcdcab2

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

paddlenlp/transformers/deepseek_v2/modeling.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,7 +1155,7 @@ def _get_name_mappings(cls, config: DeepseekV2Config) -> list[StateDictNameMappi
11551155
]
11561156
model_mappings.extend(layer_mappings)
11571157

1158-
# MoE paramerters
1158+
# MoE parameters
11591159
model_mappings.append([f"layers.{layer_index}.mlp.gate.weight", None, "transpose"])
11601160
for expert_idx in range(config.n_routed_experts):
11611161
expert_mappings = [
@@ -1170,10 +1170,10 @@ def _get_name_mappings(cls, config: DeepseekV2Config) -> list[StateDictNameMappi
11701170

11711171
init_name_mappings(mappings=model_mappings)
11721172
# base-model prefix "Qwen2MoEModel"
1173-
if "Qwen2Model" not in config.architectures:
1173+
if "DeepSeekV2Model" not in config.architectures:
11741174
for mapping in model_mappings:
11751175
mapping[0] = "model." + mapping[0]
1176-
mapping[1] = "deepseek_v2." + mapping[1]
1176+
mapping[1] = f"{cls.base_model_prefix}." + mapping[1]
11771177
if not config.tie_word_embeddings:
11781178
model_mappings.append(["lm_head.weight", "lm_head.weight", "transpose"])
11791179

0 commit comments

Comments
 (0)