Skip to content

Commit fe9e9f8

Browse files
authored
fix bug (#11006)
1 parent 6e6b373 commit fe9e9f8

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

paddlenlp/transformers/deepseek_v2/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2572,7 +2572,7 @@ def forward(
25722572
hidden_states = self.hnorm(hidden_states)
25732573
nextn_hidden_state = self.enorm(nextn_hidden_state)
25742574

2575-
concat_h = paddle.concat([hidden_states, nextn_hidden_state], axis=-1)
2575+
concat_h = paddle.concat([nextn_hidden_state, hidden_states], axis=-1)
25762576
hidden_states = LMHeadFunction.apply(concat_h, self.eh_proj.weight, False)
25772577

25782578
layer_outputs = super(DeepseekV2MTPLayer, self).forward(

paddlenlp/transformers/deepseek_v2/modeling_pp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1814,7 +1814,7 @@ def attn_compute_for_fusion(self, args):
18141814
hidden_states = self.hnorm(hidden_states)
18151815
nextn_hidden_state = self.enorm(nextn_hidden_state)
18161816

1817-
hidden_states = self.eh_proj(paddle.concat([hidden_states, nextn_hidden_state], axis=-1))
1817+
hidden_states = self.eh_proj(paddle.concat([nextn_hidden_state, hidden_states], axis=-1))
18181818

18191819
# attention compute
18201820
hidden_states, residual = self.self_attn_compute(hidden_states)

0 commit comments

Comments
 (0)