Skip to content

Commit 9461d73

Browse files
committed
rename StandardPrepareAndFinalize
Signed-off-by: Bill Nell <[email protected]>
1 parent d6e801e commit 9461d73

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed

vllm/model_executor/layers/fused_moe/cutlass_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
99
from vllm import _custom_ops as ops
1010
from vllm.model_executor.layers.fused_moe.prepare_finalize import (
11-
StandardPrepareAndFinalize)
11+
MoEPrepareAndFinalizeNoEP)
1212
from vllm.model_executor.layers.fused_moe.utils import _fp8_perm, _resize_cache
1313
from vllm.scalar_type import scalar_types
1414

@@ -241,7 +241,7 @@ def cutlass_moe_fp8(
241241
a2_scale.numel() != 1 if a2_scale is not None else False)
242242

243243
fn = mk.FusedMoEModularKernel(
244-
StandardPrepareAndFinalize(
244+
MoEPrepareAndFinalizeNoEP(
245245
per_channel_quant=per_act_token,
246246
quant_dtype=torch.float8_e4m3fn,
247247
),

vllm/model_executor/layers/fused_moe/deep_gemm_moe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
1111
_moe_permute)
1212
from vllm.model_executor.layers.fused_moe.prepare_finalize import (
13-
StandardPrepareAndFinalize)
13+
MoEPrepareAndFinalizeNoEP)
1414
from vllm.model_executor.layers.fused_moe.utils import (_fp8_quantize,
1515
_resize_cache)
1616
from vllm.utils import round_up
@@ -205,8 +205,8 @@ def deep_gemm_moe_fp8(
205205
- torch.Tensor: The bfloat16 output tensor after applying the MoE layer.
206206
"""
207207
fn = mk.FusedMoEModularKernel(
208-
StandardPrepareAndFinalize(quant_dtype=torch.float8_e4m3fn,
209-
block_shape=deep_gemm_block_shape()),
208+
MoEPrepareAndFinalizeNoEP(quant_dtype=torch.float8_e4m3fn,
209+
block_shape=deep_gemm_block_shape()),
210210
DeepGemmExperts(),
211211
)
212212
return fn(

vllm/model_executor/layers/fused_moe/fused_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
1717
moe_align_block_size)
1818
from vllm.model_executor.layers.fused_moe.prepare_finalize import (
19-
StandardPrepareAndFinalize)
19+
MoEPrepareAndFinalizeNoEP)
2020
from vllm.model_executor.layers.fused_moe.utils import (
2121
_resize_cache, moe_kernel_quantize_input)
2222
from vllm.platforms import current_platform
@@ -1706,7 +1706,7 @@ def modular_triton_fused_moe(
17061706
use_int4_w4a16=use_int4_w4a16,
17071707
)
17081708
return mk.FusedMoEModularKernel(
1709-
StandardPrepareAndFinalize(
1709+
MoEPrepareAndFinalizeNoEP(
17101710
quant_dtype=qtype,
17111711
per_channel_quant=per_channel_quant,
17121712
block_shape=block_shape,

vllm/model_executor/layers/fused_moe/prepare_finalize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
moe_kernel_quantize_input)
1111

1212

13-
class StandardPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
13+
class MoEPrepareAndFinalizeNoEP(mk.FusedMoEPrepareAndFinalize):
1414

1515
def __init__(
1616
self,

0 commit comments

Comments
 (0)