Skip to content

Commit 298aac8

Browse files
authored
fix ernie4.5 moe bug (#2689)
1 parent 01597fc commit 298aac8

File tree

3 files changed

+10
-4
lines changed

3 files changed

+10
-4
lines changed

paddleformers/nn/moe/moe_allgather_layer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -714,9 +714,9 @@ def forward_experts(self, *dispatched_input):
714714
else:
715715
input_shape = [
716716
1,
717-
true_experts[iexpert].down_proj.lora_A.shape[1],
717+
true_experts[iexpert].down_proj.lora_B.shape[1],
718718
]
719-
input_dtype = true_experts[iexpert].down_proj.lora_A.dtype
719+
input_dtype = true_experts[iexpert].down_proj.lora_B.dtype
720720

721721
chunk = paddle.zeros(
722722
input_shape,

paddleformers/transformers/configuration_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,14 @@ class LlmMetaConfig:
284284
]
285285

286286
loss_attributes = [
287-
("use_fused_head_loss_fn", bool, False, "Whether to use fused head and loss function."),
287+
("use_fused_head_and_loss_fn", bool, False, "Whether to use fused head and loss function."),
288288
("use_filtered_label_loss", bool, False, "Whether to use filtered label loss."),
289+
(
290+
"use_sparse_head_and_loss_fn",
291+
bool,
292+
False,
293+
"Maintained for compatibility, recommend using use_filtered_label_loss instead. (Legacy params)",
294+
),
289295
(
290296
"loss_subbatch_sequence_length",
291297
int,

paddleformers/transformers/model_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def _is_need_transpose(key):
397397

398398
def _transpose_hf_weight(key, weight):
399399
if _is_need_transpose(key):
400-
return weight.transpose([-1, -2])
400+
return np.ascontiguousarray(weight.transpose([-1, -2]))
401401
return weight
402402

403403
part_state_dict = {}

0 commit comments

Comments
 (0)