Skip to content

Commit 1045e6f

Browse files
committed
reduce test memory usage
Signed-off-by: Dongfeng Yu <[email protected]>
1 parent 7de4bdd commit 1045e6f

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

tests/unittest/_torch/modules/test_fused_moe.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,7 +1426,7 @@ def test_fused_moe_nvfp4_gptoss_style(hidden_size, intermediate_size,
14261426
intermediate_size=intermediate_size,
14271427
num_experts=32,
14281428
top_k=4,
1429-
seq_len=8192,
1429+
seq_len=256,
14301430
gptoss_style=True,
14311431
swiglu_alpha=swiglu_alpha,
14321432
swiglu_beta=swiglu_beta,
@@ -1621,8 +1621,13 @@ def run_fused_moe_nvfp4(dtype,
16211621
with torch.inference_mode():
16221622
ref_output = ref_fused_moe.forward(x, router_logits)
16231623

1624-
with torch.inference_mode(), autotune():
1625-
fused_moe.forward(x, router_logits)
1624+
if not gptoss_style:
1625+
with torch.inference_mode(), autotune():
1626+
fused_moe.forward(x, router_logits)
1627+
else:
1628+
# We skip autotune for gptoss style to reduce memory usage since the input shape is already quite large.
1629+
with torch.inference_mode():
1630+
fused_moe.forward(x, router_logits)
16261631

16271632
output = fused_moe.forward(x, router_logits)
16281633

0 commit comments

Comments
 (0)