Skip to content

Commit d0bb140

Browse files
Hexq0210McZyWu
andauthored
[NPU] bugfix for model Qwen3-Coder-Next at weight shape transpose for npu. (sgl-project#18700)
Co-authored-by: McZyWu <zhuoyun.wu.23@ucl.ac.uk>
1 parent a1b39c1 commit d0bb140

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

python/sglang/srt/hardware_backend/npu/quantization/fused_moe_method_npu.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def npu_fused_moe_without_routing_weights_bf16(
118118
# gmm1: gate_up_proj
119119
hidden_states = torch.ops.npu.npu_grouped_matmul(
120120
x=[hidden_states],
121-
weight=[layer.w13_weight.permute(0, 2, 1)],
121+
weight=[layer.w13_weight],
122122
split_item=2,
123123
group_list_type=group_list_type,
124124
group_type=0,
@@ -129,7 +129,7 @@ def npu_fused_moe_without_routing_weights_bf16(
129129
# gmm2: down_proj
130130
hidden_states = torch.ops.npu.npu_grouped_matmul(
131131
x=[hidden_states],
132-
weight=[layer.w2_weight.permute(0, 2, 1)],
132+
weight=[layer.w2_weight],
133133
split_item=2,
134134
group_list_type=group_list_type,
135135
group_type=0,

python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from sglang.srt.utils import cpu_has_amx_support, is_cpu, is_cuda, is_npu
4747
from sglang.srt.utils.common import rank0_log
4848

49-
if not is_cpu() and not is_npu():
49+
if not is_cpu():
5050
# fix import error on CPU device, no impacts when non-CPU path
5151
try:
5252
from sglang.jit_kernel.cutedsl_gdn import (

0 commit comments

Comments
 (0)