Skip to content

Commit 448658a

Browse files
committed
more lint stuff
Signed-off-by: Bill Nell <[email protected]>
1 parent b6ae861 commit 448658a

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from .fused_batched_moe import BatchedDispatchCombine, BatchedTritonExperts
3333
from .fused_moe import TritonExperts, fused_experts
3434
from .modular_kernel import (FusedMoEModularKernel,
35+
FusedMoEPermuteExpertsUnpermute,
3536
FusedMoEQuantizeDispatchCombine)
3637
from .pplx_dispatch_combine import PplxDispatchCombine
3738
else:
@@ -249,6 +250,8 @@ def set_dispatch_combine(
249250

250251
#block_m = MOE_DP_CHUNK_SIZE * (self.moe.ep_size // self.moe.dp_size)
251252

253+
experts: FusedMoEPermuteExpertsUnpermute = None
254+
252255
if isinstance(dispatch_combine,
253256
(BatchedDispatchCombine, PplxDispatchCombine)):
254257
logger.info("BatchedTritonExperts %s", self.moe)
@@ -619,6 +622,8 @@ def __init__(
619622
assert quant_method is not None
620623
self.quant_method = quant_method
621624

625+
dispatch_combine: FusedMoEQuantizeDispatchCombine = None
626+
622627
# TODO: move to method?
623628
if self.dp_size > 1:
624629
logger.info("using pplx dispatch")

vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,25 @@
66
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
77
from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
88
DeepGemmExperts, _valid_deep_gemm, _valid_deep_gemm_shape)
9-
from vllm.model_executor.layers.fused_moe.fused_moe import TritonExpert
9+
from vllm.model_executor.layers.fused_moe.fused_moe import TritonExperts
1010

1111

1212
class TritonOrDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
1313

1414
def __init__(self,
1515
use_fp8_w8a8: bool,
16+
use_int8_w8a8: bool,
1617
use_int8_w8a16: bool,
1718
use_int4_w4a16: bool,
19+
per_channel_quant: bool,
1820
block_shape: Optional[List[int]] = None,
1921
block_m: Optional[int] = None,
2022
allow_deep_gemm: bool = False):
2123
super().__init__()
22-
self.triton_expert = TritonExpert(use_fp8_w8a8, use_int4_w4a16,
23-
use_int8_w8a16, block_shape, block_m)
24+
self.triton_expert = TritonExperts(use_fp8_w8a8, use_int8_w8a8,
25+
use_int4_w4a16, use_int8_w8a16,
26+
per_channel_quant, block_shape,
27+
block_m)
2428
self.deep_gemm_expert = DeepGemmExperts()
2529
self.allow_deep_gemm = allow_deep_gemm
2630
self.use_fp8_w8a8 = use_fp8_w8a8

0 commit comments

Comments
 (0)