File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
paddlenlp/transformers/deepseek_v2 Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -170,15 +170,15 @@ def forward_without_residual(self, inputs):
170
170
with paddle .no_grad ():
171
171
if self .shared_experts is not None :
172
172
if self .using_post_norm_recompute :
173
- shared_expert_output = fp8_mlp_fwd_norm_rc (
173
+ shared_expert_output = FP8LinearFunctionBase . fp8_mlp_fwd_norm_rc (
174
174
hidden_states ,
175
175
self .shared_experts .norm_weight ,
176
176
self .shared_experts .norm_eps ,
177
177
self .shared_experts .w1 ,
178
178
self .shared_experts .w2 ,
179
179
)
180
180
else :
181
- shared_expert_output = fp8_mlp_fwd (hidden_states , self .shared_experts .w1 , self .shared_experts .w2 )
181
+ shared_expert_output = FP8LinearFunctionBase . fp8_mlp_fwd (hidden_states , self .shared_experts .w1 , self .shared_experts .w2 )
182
182
residual = residual + shared_expert_output
183
183
184
184
self .x = hidden_states
You can’t perform that action at this time.
0 commit comments