File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
paddlenlp/transformers/deepseek_v2 Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -2572,7 +2572,7 @@ def forward(
2572
2572
hidden_states = self .hnorm (hidden_states )
2573
2573
nextn_hidden_state = self .enorm (nextn_hidden_state )
2574
2574
2575
- concat_h = paddle .concat ([hidden_states , nextn_hidden_state ], axis = - 1 )
2575
+ concat_h = paddle .concat ([nextn_hidden_state , hidden_states ], axis = - 1 )
2576
2576
hidden_states = LMHeadFunction .apply (concat_h , self .eh_proj .weight , False )
2577
2577
2578
2578
layer_outputs = super (DeepseekV2MTPLayer , self ).forward (
Original file line number Diff line number Diff line change @@ -1814,7 +1814,7 @@ def attn_compute_for_fusion(self, args):
1814
1814
hidden_states = self .hnorm (hidden_states )
1815
1815
nextn_hidden_state = self .enorm (nextn_hidden_state )
1816
1816
1817
- hidden_states = self .eh_proj (paddle .concat ([hidden_states , nextn_hidden_state ], axis = - 1 ))
1817
+ hidden_states = self .eh_proj (paddle .concat ([nextn_hidden_state , hidden_states ], axis = - 1 ))
1818
1818
1819
1819
# attention compute
1820
1820
hidden_states , residual = self .self_attn_compute (hidden_states )
You can’t perform that action at this time.
0 commit comments