We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e72e16c commit 6836bf1Copy full SHA for 6836bf1
lightllm/common/fused_moe/moe_silu_and_mul.py
@@ -72,7 +72,7 @@ def _silu_and_mul_kernel_fast(
72
for bm in [32, 64, 128, 256]
73
for bn in [32, 64, 128, 256]
74
],
75
- default_config={"BLOCK_M": 128, "BLOCK_N": 128, "num_warps": 4, "num_stages": 1},
+ default_config={"BLOCK_M": 128, "BLOCK_N": 128, "num_warps": 4, "NUM_STAGES": 1},
76
static_key_func=lambda input, output: f"N={input.shape[-1] // 2},out_dtype={output.dtype}",
77
run_key_func=lambda input: str(nearest_power_of_2(input.shape[0])),
78
)
0 commit comments