Skip to content

Commit b44d392

Browse files
yingxudengliutongxuan
authored andcommitted
bugfix: resolve qwen3-moe quantization inference errors with TP > 4.
1 parent 3b7e1a2 commit b44d392

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

xllm/core/layers/npu/npu_qwen3_moe_decoder_layer_impl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,9 @@ void NpuQwen3MoeDecoderLayerImpl::process_general_weights(
557557

558558
if (index == IN_QKV_WEIGHT_1 || index == IN_QKV_WEIGHT_2 ||
559559
index == IN_QKV_BIAS_1 || index == IN_QKV_BIAS_2 ||
560-
index == IN_QKV_DESCALE_1 || index == IN_QKV_DESCALE_2) {
560+
index == IN_QKV_DESCALE_1 || index == IN_QKV_DESCALE_2 ||
561+
index == IN_QKV_OFFSET_1 || index == IN_QKV_OFFSET_2 ||
562+
index == IN_QKV_SCALE_1 || index == IN_QKV_SCALE_2) {
561563
if (n_kv_heads_ < dp_local_tp_size_) {
562564
int32_t repeat_times = (dp_local_tp_size_ / n_kv_heads_);
563565

0 commit comments

Comments
 (0)