Skip to content

Commit ddb0641

Browse files
committed
[Qwen-moe] use npu_add_rms_norm_quant operator
Signed-off-by: s30076806 <[email protected]>
1 parent 0f7492d commit ddb0641

File tree

1 file changed

+15
-3
lines changed

1 file changed

+15
-3
lines changed

vllm_ascend/models/qwen3_moe.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@
5050
from vllm_ascend.ops.fused_moe import AscendFusedMoE
5151
from vllm_ascend.ops.sequence_parallel import (MetadataForPadding,
5252
init_metadata_for_sp)
53-
53+
from vllm_ascend.ops.layernorm import AddRMSNormW8A8Quant
54+
from vllm_ascend.quantization.w8a8 import AscendW8A8LinearMethod
55+
from vllm_ascend.quantization.quant_config import AscendQuantConfig
5456

5557
class CustomSparseMoeBlock(Qwen3MoeSparseMoeBlock):
5658

@@ -183,8 +185,18 @@ def __init__(
183185
hidden_act=config.hidden_act,
184186
quant_config=quant_config,
185187
prefix=f"{prefix}.mlp")
186-
self.input_layernorm = RMSNorm(config.hidden_size,
187-
eps=config.rms_norm_eps)
188+
189+
assert isinstance(quant_config, AscendQuantConfig), \
190+
"Expected quant_config to be an instance of AscendQuantConfig"
191+
if isinstance(self.self_attn.qkv_proj.quant_method.quant_method,
192+
AscendW8A8LinearMethod):
193+
self.input_layernorm = AddRMSNormW8A8Quant(
194+
config.hidden_size,
195+
layer=self.self_attn.qkv_proj,
196+
eps=config.rms_norm_eps)
197+
else:
198+
self.input_layernorm = RMSNorm(config.hidden_size,
199+
eps=config.rms_norm_eps)
188200
self.post_attention_layernorm = RMSNorm(config.hidden_size,
189201
eps=config.rms_norm_eps)
190202

0 commit comments

Comments
 (0)