Skip to content

Commit 15398e0

Browse files
chtruong814ko3n1g
andauthored
fix: Remove experimental_attention_variant arg from get_gpt_layer call in mlm_compat (#2073)
Signed-off-by: Charlie Truong <[email protected]> Co-authored-by: oliver könig <[email protected]>
1 parent 59cf515 commit 15398e0

File tree

3 files changed

+1
-5
lines changed

3 files changed

+1
-5
lines changed

src/megatron/bridge/training/mlm_compat/model.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ def _get_transformer_layer_spec(args: argparse.Namespace, use_te: bool, use_kitc
5959
moe_grouped_gemm=args.moe_grouped_gemm,
6060
qk_layernorm=args.qk_layernorm,
6161
multi_latent_attention=args.multi_latent_attention,
62-
experimental_attention_variant=getattr(args, "experimental_attention_variant", None),
6362
moe_use_legacy_grouped_gemm=args.moe_use_legacy_grouped_gemm,
6463
qk_l2_norm=args.qk_l2_norm,
6564
use_kitchen=use_kitchen,
@@ -70,7 +69,6 @@ def _get_transformer_layer_spec(args: argparse.Namespace, use_te: bool, use_kitc
7069
moe_grouped_gemm=args.moe_grouped_gemm,
7170
qk_layernorm=args.qk_layernorm,
7271
multi_latent_attention=args.multi_latent_attention,
73-
experimental_attention_variant=getattr(args, "experimental_attention_variant", None),
7472
moe_use_legacy_grouped_gemm=args.moe_use_legacy_grouped_gemm,
7573
normalization=args.normalization,
7674
use_kitchen=use_kitchen,

tests/unit_tests/Launch_Unit_Tests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ fi
2626
echo ""
2727

2828
CUDA_VISIBLE_DEVICES="0,1" uv run coverage run -a --data-file=/opt/Megatron-Bridge/.coverage --source=/opt/Megatron-Bridge/ -m pytest \
29-
--timeout=0.75 \
29+
--timeout=1 \
3030
-o log_cli=true \
3131
-o log_cli_level=INFO \
3232
--disable-warnings \

tests/unit_tests/training/mlm_compat/test_model.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ def test_te_spec(self, mock_te_spec_func, mock_args):
105105
moe_grouped_gemm=False,
106106
qk_layernorm=False,
107107
multi_latent_attention=False,
108-
experimental_attention_variant=None,
109108
moe_use_legacy_grouped_gemm=False,
110109
qk_l2_norm=False,
111110
use_kitchen=False,
@@ -121,7 +120,6 @@ def test_local_spec(self, mock_local_spec_func, mock_args):
121120
moe_grouped_gemm=False,
122121
qk_layernorm=False,
123122
multi_latent_attention=False,
124-
experimental_attention_variant=None,
125123
moe_use_legacy_grouped_gemm=False,
126124
normalization="LayerNorm",
127125
use_kitchen=True,

0 commit comments

Comments
 (0)