Skip to content

Commit 58eca40

Browse files
committed
fix import
1 parent f79f96f commit 58eca40

File tree

1 file changed

+4
-7
lines changed

1 file changed

+4
-7
lines changed

test/benchmark/kernel/benchmark_fused_moe_triton.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88
from transformers import AutoConfig
99
from lightllm.common.fused_moe.topk_select import select_experts
1010
from lightllm.common.fused_moe.grouped_fused_moe import fused_experts_impl
11-
from vllm.model_executor.layers.fused_moe.fused_moe import fused_moe as fused_moe_vllm
12-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
13-
fused_moe as fused_moe_sglang,
14-
)
1511

1612

1713
def get_model_config(model_name: str, tp_size: int):
@@ -59,12 +55,10 @@ def get_model_config(model_name: str, tp_size: int):
5955
intermediate_size = config.intermediate_size
6056
shard_intermediate_size = 2 * intermediate_size // tp_size
6157

62-
vllm_version_num = vllm.__version_tuple__[0] * 100 + vllm.__version_tuple__[1] * 10 + vllm.__version_tuple__[2]
6358
block_shape = None
6459
if hasattr(config, "quantization_config") and "weight_block_size" in config.quantization_config:
6560
block_shape = config.quantization_config["weight_block_size"]
6661
assert len(block_shape) == 2
67-
assert vllm_version_num >= 66, "Block-wise quantized fp8 fused_moe is only supported for VLLM>=0.6.6.post1"
6862

6963
shape_configs = {
7064
"num_experts": E,
@@ -131,6 +125,7 @@ def fused_moe_vllm_api(
131125
a2_scale=None,
132126
block_shape=None,
133127
):
128+
from vllm.model_executor.layers.fused_moe.fused_moe import fused_moe as fused_moe_vllm
134129
if block_shape is not None:
135130
return fused_moe_vllm(
136131
x,
@@ -179,7 +174,9 @@ def fused_moe_sglang_api(
179174
):
180175
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
181176
from sglang.srt.layers.moe.topk import TopK, TopKConfig, select_experts
182-
177+
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
178+
fused_moe as fused_moe_sglang,
179+
)
183180
topk_output = select_experts(
184181
hidden_states=x,
185182
router_logits=input_gating,

0 commit comments

Comments
 (0)