Skip to content

Commit cd89d4f

Browse files
committed
Clean up after rebase
1 parent bfb9dd4 commit cd89d4f

File tree

3 files changed

+1
-10
lines changed

3 files changed

+1
-10
lines changed

megatron/core/fp8_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def _get_custom_recipe(quantizer_factory_python_path: str) -> Union[Fp8Recipe, F
168168
def get_fp8_align_size(fp8_recipe: Fp8Recipe) -> int:
169169
"""Get the alignment size required for fp8 GEMM."""
170170
if fp8_recipe == Fp8Recipe.mxfp8:
171-
return 128
171+
return 32
172172
else:
173173
return 16
174174

megatron/core/transformer/transformer_config.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -690,10 +690,6 @@ class TransformerConfig(ModelParallelConfig):
690690
GEMM feature introduced since CUTLASS 2.8 (https://github.com/fanshiqing/grouped_gemm).
691691
"""
692692

693-
moe_use_device_initiated_grouped_gemm: bool = False
694-
"""Use the cutlass grouped gemm kernel, which allows for the token_per_expert tensor on GPU.
695-
This can prevent the GPU-CPU synchronization during the grouped gemm."""
696-
697693
moe_use_legacy_grouped_gemm: bool = False
698694
"""Use legacy GroupedMLP rather than TEGroupedMLP.
699695
Note: The legacy one will be deprecated soon."""

tests/unit_tests/transformer/moe/test_paged_stashing.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,6 @@ def __init__(
111111
moe_permute_fusion=kwargs.get("moe_permute_fusion", False),
112112
moe_flex_dispatcher_backend=kwargs.get("moe_flex_dispatcher_backend", None),
113113
moe_grouped_gemm=kwargs.get("moe_grouped_gemm", False),
114-
moe_use_device_initiated_grouped_gemm=kwargs.get(
115-
"moe_use_device_initiated_grouped_gemm", False
116-
),
117114
moe_use_legacy_grouped_gemm=kwargs.get("moe_use_legacy_grouped_gemm", False),
118115
moe_paged_stash=kwargs.get("moe_paged_stash", False),
119116
stash_modules=kwargs.get("stash_modules", None),
@@ -224,7 +221,6 @@ def test_forward_backward_4_layers(self):
224221
moe_flex_dispatcher_backend="hybridep",
225222
test_dtype=torch.bfloat16,
226223
moe_grouped_gemm=True,
227-
moe_use_device_initiated_grouped_gemm=True,
228224
moe_use_legacy_grouped_gemm=False,
229225
moe_paged_stash=True,
230226
stash_modules=["expert_fc1", "moe_act", "expert_fc2"],
@@ -318,7 +314,6 @@ def test_overload_factor_and_over_budget(self):
318314
moe_flex_dispatcher_backend="hybridep",
319315
test_dtype=torch.bfloat16,
320316
moe_grouped_gemm=True,
321-
moe_use_device_initiated_grouped_gemm=True,
322317
moe_use_legacy_grouped_gemm=False,
323318
moe_paged_stash=True,
324319
stash_modules=["expert_fc1", "moe_act", "expert_fc2"],

0 commit comments

Comments
 (0)