Skip to content

Commit 04cdc2e

Browse files
committed
DS has poor default
1 parent dcb2d61 commit 04cdc2e

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

megatron/model/shared_t5_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def _to_16bit(inputs):
8383
layer_type=LayerType.encoder,
8484
layer_number=layer_idx,
8585
self_attn_mask_type=AttnMaskType.causal,
86+
tied_weight_attr=None,
8687
tied_weight_attrs=["self_attention", "mlp"]
8788
))
8889

@@ -108,6 +109,7 @@ def _to_16bit(inputs):
108109
layer_number=layer_idx,
109110
layer_type=LayerType.decoder,
110111
self_attn_mask_type=AttnMaskType.padding,
112+
tied_weight_attr=None,
111113
tied_weight_attrs=["self_attention", "mlp"]
112114
)
113115
)

0 commit comments

Comments
 (0)